diff options
Diffstat (limited to 'net')
647 files changed, 43802 insertions, 24805 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig new file mode 100644 index 000000000000..be33d27c8e69 --- /dev/null +++ b/net/802/Kconfig @@ -0,0 +1,7 @@ +config STP + tristate + select LLC + +config GARP + tristate + select STP diff --git a/net/802/Makefile b/net/802/Makefile index 68569ffddea1..7893d679910c 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o +obj-$(CONFIG_STP) += stp.o +obj-$(CONFIG_GARP) += garp.o diff --git a/net/802/garp.c b/net/802/garp.c new file mode 100644 index 000000000000..1dcb0660c49d --- /dev/null +++ b/net/802/garp.c @@ -0,0 +1,636 @@ +/* + * IEEE 802.1D Generic Attribute Registration Protocol (GARP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/garp.h> +#include <asm/unaligned.h> + +static unsigned int garp_join_time __read_mostly = 200; +module_param(garp_join_time, uint, 0644); +MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const struct garp_state_trans { + u8 state; + u8 action; +} garp_applicant_state_table[GARP_APPLICANT_MAX + 1][GARP_EVENT_MAX + 1] = { + [GARP_APPLICANT_VA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_AA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_QA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_LA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_VO, + .action = GARP_ACTION_S_LEAVE_EMPTY }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_VP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_VO }, + }, + [GARP_APPLICANT_AP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_AO }, + }, + [GARP_APPLICANT_QP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_QO }, + }, + [GARP_APPLICANT_VO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_AO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_QO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, +}; + +static int garp_attr_cmp(const struct garp_attr *attr, + const void *data, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->dlen != len) + return attr->dlen - len; + return memcmp(attr->data, data, len); +} + +static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct rb_node *parent = app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, data, len, type); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new) +{ + struct rb_node *parent = NULL, **p = &app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, new->data, new->dlen, new->type); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &app->gid); +} + +static struct garp_attr *garp_attr_create(struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct garp_attr *attr; + + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = GARP_APPLICANT_VO; + attr->type = type; + attr->dlen = len; + memcpy(attr->data, data, len); + garp_attr_insert(app, attr); + return attr; +} + +static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr) +{ + rb_erase(&attr->node, &app->gid); + kfree(attr); +} + +static int garp_pdu_init(struct garp_applicant *app) +{ + struct sk_buff *skb; + struct garp_pdu_hdr *gp; + +#define LLC_RESERVE sizeof(struct llc_pdu_un) + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = htons(ETH_P_802_2); + skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE); + + gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp)); + put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol); + + app->pdu = skb; + return 0; +} + +static int garp_pdu_append_end_mark(struct garp_applicant *app) +{ + if (skb_tailroom(app->pdu) < sizeof(u8)) + return -1; + *(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK; + return 0; +} + +static void garp_pdu_queue(struct garp_applicant *app) +{ + if (!app->pdu) + return; + + garp_pdu_append_end_mark(app); + garp_pdu_append_end_mark(app); + + llc_pdu_header_init(app->pdu, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, + LLC_SAP_BSPAN, LLC_PDU_CMD); + llc_pdu_init_as_ui_cmd(app->pdu); + llc_mac_hdr_init(app->pdu, app->dev->dev_addr, + app->app->proto.group_address); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void garp_queue_xmit(struct garp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype) +{ + struct garp_msg_hdr *gm; + + if (skb_tailroom(app->pdu) < sizeof(*gm)) + return -1; + gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm)); + gm->attrtype = attrtype; + garp_cb(app->pdu)->cur_type = attrtype; + return 0; +} + +static int garp_pdu_append_attr(struct garp_applicant *app, + const struct garp_attr *attr, + enum garp_attr_event event) +{ + struct garp_attr_hdr *ga; + unsigned int len; + int err; +again: + if (!app->pdu) { + err = garp_pdu_init(app); + if (err < 0) + return err; + } + + if (garp_cb(app->pdu)->cur_type != attr->type) { + if (garp_cb(app->pdu)->cur_type && + garp_pdu_append_end_mark(app) < 0) + goto queue; + if (garp_pdu_append_msg(app, attr->type) < 0) + goto queue; + } + + len = sizeof(*ga) + attr->dlen; + if (skb_tailroom(app->pdu) < len) + goto queue; + ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len); + ga->len = len; + ga->event = event; + memcpy(ga->data, attr->data, attr->dlen); + return 0; + +queue: + garp_pdu_queue(app); + goto again; +} + +static void garp_attr_event(struct garp_applicant *app, + struct garp_attr *attr, enum garp_event event) +{ + enum garp_applicant_state state; + + state = garp_applicant_state_table[attr->state][event].state; + if (state == GARP_APPLICANT_INVALID) + return; + + switch (garp_applicant_state_table[attr->state][event].action) { + case GARP_ACTION_NONE: + break; + case GARP_ACTION_S_JOIN_IN: + /* When appending the attribute fails, don't update state in + * order to retry on next TRANSMIT_PDU event. */ + if (garp_pdu_append_attr(app, attr, GARP_JOIN_IN) < 0) + return; + break; + case GARP_ACTION_S_LEAVE_EMPTY: + garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY); + /* As a pure applicant, sending a leave message implies that + * the attribute was unregistered and can be destroyed. */ + garp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + + attr->state = state; +} + +int garp_request_join(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_create(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(garp_request_join); + +void garp_request_leave(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_lookup(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_LEAVE); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(garp_request_leave); + +static void garp_gid_event(struct garp_applicant *app, enum garp_event event) +{ + struct rb_node *node, *next; + struct garp_attr *attr; + + for (node = rb_first(&app->gid); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct garp_attr, node); + garp_attr_event(app, attr, event); + } +} + +static void garp_join_timer_arm(struct garp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void garp_join_timer(unsigned long data) +{ + struct garp_applicant *app = (struct garp_applicant *)data; + + spin_lock(&app->lock); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + spin_unlock(&app->lock); + + garp_queue_xmit(app); + garp_join_timer_arm(app); +} + +static int garp_pdu_parse_end_mark(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(u8))) + return -1; + if (*skb->data == GARP_END_MARK) { + skb_pull(skb, sizeof(u8)); + return -1; + } + return 0; +} + +static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb, + u8 attrtype) +{ + const struct garp_attr_hdr *ga; + struct garp_attr *attr; + enum garp_event event; + unsigned int dlen; + + if (!pskb_may_pull(skb, sizeof(*ga))) + return -1; + ga = (struct garp_attr_hdr *)skb->data; + if (ga->len < sizeof(*ga)) + return -1; + + if (!pskb_may_pull(skb, ga->len)) + return -1; + skb_pull(skb, ga->len); + dlen = sizeof(*ga) - ga->len; + + if (attrtype > app->app->maxattr) + return 0; + + switch (ga->event) { + case GARP_LEAVE_ALL: + if (dlen != 0) + return -1; + garp_gid_event(app, GARP_EVENT_R_LEAVE_EMPTY); + return 0; + case GARP_JOIN_EMPTY: + event = GARP_EVENT_R_JOIN_EMPTY; + break; + case GARP_JOIN_IN: + event = GARP_EVENT_R_JOIN_IN; + break; + case GARP_LEAVE_EMPTY: + event = GARP_EVENT_R_LEAVE_EMPTY; + break; + case GARP_EMPTY: + event = GARP_EVENT_R_EMPTY; + break; + default: + return 0; + } + + if (dlen == 0) + return -1; + attr = garp_attr_lookup(app, ga->data, dlen, attrtype); + if (attr == NULL) + return 0; + garp_attr_event(app, attr, event); + return 0; +} + +static int garp_pdu_parse_msg(struct garp_applicant *app, struct sk_buff *skb) +{ + const struct garp_msg_hdr *gm; + + if (!pskb_may_pull(skb, sizeof(*gm))) + return -1; + gm = (struct garp_msg_hdr *)skb->data; + if (gm->attrtype == 0) + return -1; + skb_pull(skb, sizeof(*gm)); + + while (skb->len > 0) { + if (garp_pdu_parse_attr(app, skb, gm->attrtype) < 0) + return -1; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + return 0; +} + +static void garp_pdu_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) +{ + struct garp_application *appl = proto->data; + struct garp_port *port; + struct garp_applicant *app; + const struct garp_pdu_hdr *gp; + + port = rcu_dereference(dev->garp_port); + if (!port) + goto err; + app = rcu_dereference(port->applicants[appl->type]); + if (!app) + goto err; + + if (!pskb_may_pull(skb, sizeof(*gp))) + goto err; + gp = (struct garp_pdu_hdr *)skb->data; + if (get_unaligned(&gp->protocol) != htons(GARP_PROTOCOL_ID)) + goto err; + skb_pull(skb, sizeof(*gp)); + + spin_lock(&app->lock); + while (skb->len > 0) { + if (garp_pdu_parse_msg(app, skb) < 0) + break; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + spin_unlock(&app->lock); +err: + kfree_skb(skb); +} + +static int garp_init_port(struct net_device *dev) +{ + struct garp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->garp_port, port); + return 0; +} + +static void garp_release_port(struct net_device *dev) +{ + struct garp_port *port = dev->garp_port; + unsigned int i; + + for (i = 0; i <= GARP_APPLICATION_MAX; i++) { + if (port->applicants[i]) + return; + } + rcu_assign_pointer(dev->garp_port, NULL); + synchronize_rcu(); + kfree(port); +} + +int garp_init_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!dev->garp_port) { + err = garp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->gid = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); + garp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + garp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(garp_init_applicant); + +void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + + ASSERT_RTNL(); + + rcu_assign_pointer(port->applicants[appl->type], NULL); + synchronize_rcu(); + + /* Delete timer and generate a final TRANSMIT_PDU event to flush out + * all pending messages before the applicant is gone. */ + del_timer_sync(&app->join_timer); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + garp_queue_xmit(app); + + dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0); + kfree(app); + garp_release_port(dev); +} +EXPORT_SYMBOL_GPL(garp_uninit_applicant); + +int garp_register_application(struct garp_application *appl) +{ + appl->proto.rcv = garp_pdu_rcv; + appl->proto.data = appl; + return stp_proto_register(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_register_application); + +void garp_unregister_application(struct garp_application *appl) +{ + stp_proto_unregister(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_unregister_application); diff --git a/net/802/psnap.c b/net/802/psnap.c index 31128cb92a23..70980baeb682 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -1,7 +1,7 @@ /* * SNAP data link layer. Derived from 802.2 * - * Alan Cox <Alan.Cox@linux.org>, + * Alan Cox <alan@lxorguk.ukuu.org.uk>, * from the 802.2 layer by Greg Page. * Merged in additions from Greg Page's psnap.c. * @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/in.h> #include <linux/init.h> +#include <linux/rculist.h> static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); @@ -30,11 +31,9 @@ static struct llc_sap *snap_sap; */ static struct datalink_proto *find_snap_client(unsigned char *desc) { - struct list_head *entry; struct datalink_proto *proto = NULL, *p; - list_for_each_rcu(entry, &snap_list) { - p = list_entry(entry, struct datalink_proto, node); + list_for_each_entry_rcu(p, &snap_list, node) { if (!memcmp(p->type, desc, 5)) { proto = p; break; diff --git a/net/802/stp.c b/net/802/stp.c new file mode 100644 index 000000000000..0b7a24452d11 --- /dev/null +++ b/net/802/stp.c @@ -0,0 +1,102 @@ +/* + * STP SAP demux + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/stp.h> + +/* 01:80:c2:00:00:20 - 01:80:c2:00:00:2F */ +#define GARP_ADDR_MIN 0x20 +#define GARP_ADDR_MAX 0x2F +#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) + +static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; +static const struct stp_proto *stp_proto __read_mostly; + +static struct llc_sap *sap __read_mostly; +static unsigned int sap_registered; +static DEFINE_MUTEX(stp_proto_mutex); + +/* Called under rcu_read_lock from LLC */ +static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + const struct ethhdr *eh = eth_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct stp_proto *proto; + + if (pdu->ssap != LLC_SAP_BSPAN || + pdu->dsap != LLC_SAP_BSPAN || + pdu->ctrl_1 != LLC_PDU_TYPE_U) + goto err; + + if (eh->h_dest[5] >= GARP_ADDR_MIN && eh->h_dest[5] <= GARP_ADDR_MAX) { + proto = rcu_dereference(garp_protos[eh->h_dest[5] - + GARP_ADDR_MIN]); + if (proto && + compare_ether_addr(eh->h_dest, proto->group_address)) + goto err; + } else + proto = rcu_dereference(stp_proto); + + if (!proto) + goto err; + + proto->rcv(proto, skb, dev); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +int stp_proto_register(const struct stp_proto *proto) +{ + int err = 0; + + mutex_lock(&stp_proto_mutex); + if (sap_registered++ == 0) { + sap = llc_sap_open(LLC_SAP_BSPAN, stp_pdu_rcv); + if (!sap) { + err = -ENOMEM; + goto out; + } + } + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, proto); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], proto); +out: + mutex_unlock(&stp_proto_mutex); + return err; +} +EXPORT_SYMBOL_GPL(stp_proto_register); + +void stp_proto_unregister(const struct stp_proto *proto) +{ + mutex_lock(&stp_proto_mutex); + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, NULL); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], NULL); + synchronize_rcu(); + + if (--sap_registered == 0) + llc_sap_put(sap); + mutex_unlock(&stp_proto_mutex); +} +EXPORT_SYMBOL_GPL(stp_proto_unregister); + +MODULE_LICENSE("GPL"); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index c4a382e450e2..fa073a54963e 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -17,3 +17,13 @@ config VLAN_8021Q will be called 8021q. If unsure, say N. + +config VLAN_8021Q_GVRP + bool "GVRP (GARP VLAN Registration Protocol) support" + depends on VLAN_8021Q + select GARP + help + Select this to enable GVRP end-system support. GVRP is used for + automatic propagation of registered VLANs to switches. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 10ca7f486c3a..9f4f174ead1c 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -1,12 +1,10 @@ # # Makefile for the Linux VLAN layer. # +obj-$(subst m,y,$(CONFIG_VLAN_8021Q)) += vlan_core.o +obj-$(CONFIG_VLAN_8021Q) += 8021q.o -obj-$(CONFIG_VLAN_8021Q) += 8021q.o - -8021q-objs := vlan.o vlan_dev.o vlan_netlink.o - -ifeq ($(CONFIG_PROC_FS),y) -8021q-objs += vlanproc.o -endif +8021q-y := vlan.o vlan_dev.o vlan_netlink.o +8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ab2225da0ee2..f0e335aa20df 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -18,21 +18,20 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/uaccess.h> /* for copy_from_user */ #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <net/datalink.h> -#include <linux/mm.h> -#include <linux/in.h> #include <linux/init.h> +#include <linux/rculist.h> #include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <asm/uaccess.h> #include <linux/if_vlan.h> #include "vlan.h" @@ -83,13 +82,12 @@ static struct vlan_group *__vlan_find_group(struct net_device *real_dev) * * Must be invoked with RCU read lock (no preempt) */ -struct net_device *__find_vlan_dev(struct net_device *real_dev, - unsigned short VID) +struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id) { struct vlan_group *grp = __vlan_find_group(real_dev); if (grp) - return vlan_group_get_device(grp, VID); + return vlan_group_get_device(grp, vlan_id); return NULL; } @@ -117,14 +115,14 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev) return grp; } -static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid) +static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) { struct net_device **array; unsigned int size; ASSERT_RTNL(); - array = vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN]; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; if (array != NULL) return 0; @@ -133,7 +131,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid) if (array == NULL) return -ENOBUFS; - vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN] = array; + vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; return 0; } @@ -147,7 +145,7 @@ void unregister_vlan_dev(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; struct vlan_group *grp; - unsigned short vlan_id = vlan->vlan_id; + u16 vlan_id = vlan->vlan_id; ASSERT_RTNL(); @@ -165,8 +163,12 @@ void unregister_vlan_dev(struct net_device *dev) synchronize_net(); + unregister_netdevice(dev); + /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { + vlan_gvrp_uninit_applicant(real_dev); + if (real_dev->features & NETIF_F_HW_VLAN_RX) real_dev->vlan_rx_register(real_dev, NULL); @@ -178,8 +180,6 @@ void unregister_vlan_dev(struct net_device *dev) /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); - - unregister_netdevice(dev); } static void vlan_transfer_operstate(const struct net_device *dev, @@ -203,7 +203,7 @@ static void vlan_transfer_operstate(const struct net_device *dev, } } -int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { char *name = real_dev->name; @@ -240,7 +240,7 @@ int register_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - unsigned short vlan_id = vlan->vlan_id; + u16 vlan_id = vlan->vlan_id; struct vlan_group *grp, *ngrp = NULL; int err; @@ -249,15 +249,18 @@ int register_vlan_dev(struct net_device *dev) ngrp = grp = vlan_group_alloc(real_dev); if (!grp) return -ENOBUFS; + err = vlan_gvrp_init_applicant(real_dev); + if (err < 0) + goto out_free_group; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; err = register_netdevice(dev); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); @@ -278,6 +281,9 @@ int register_vlan_dev(struct net_device *dev) return 0; +out_uninit_applicant: + if (ngrp) + vlan_gvrp_uninit_applicant(real_dev); out_free_group: if (ngrp) vlan_group_free(ngrp); @@ -287,8 +293,7 @@ out_free_group: /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns 0 if the device was created or a negative error code otherwise. */ -static int register_vlan_device(struct net_device *real_dev, - unsigned short VLAN_ID) +static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) { struct net_device *new_dev; struct net *net = dev_net(real_dev); @@ -296,10 +301,10 @@ static int register_vlan_device(struct net_device *real_dev, char name[IFNAMSIZ]; int err; - if (VLAN_ID >= VLAN_VID_MASK) + if (vlan_id >= VLAN_VID_MASK) return -ERANGE; - err = vlan_check_real_dev(real_dev, VLAN_ID); + err = vlan_check_real_dev(real_dev, vlan_id); if (err < 0) return err; @@ -307,26 +312,26 @@ static int register_vlan_device(struct net_device *real_dev, switch (vn->name_type) { case VLAN_NAME_TYPE_RAW_PLUS_VID: /* name will look like: eth1.0005 */ - snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, VLAN_ID); + snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: vlan5 */ - snprintf(name, IFNAMSIZ, "vlan%i", VLAN_ID); + snprintf(name, IFNAMSIZ, "vlan%i", vlan_id); break; case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: eth0.5 */ - snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, VLAN_ID); + snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID: /* Put our vlan.VID in the name. * Name will look like: vlan0005 */ default: - snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID); + snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, @@ -341,7 +346,7 @@ static int register_vlan_device(struct net_device *real_dev, */ new_dev->mtu = real_dev->mtu; - vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ + vlan_dev_info(new_dev)->vlan_id = vlan_id; vlan_dev_info(new_dev)->real_dev = real_dev; vlan_dev_info(new_dev)->dent = NULL; vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR; @@ -389,6 +394,7 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; + vlandev->gso_max_size = dev->gso_max_size; if (old_features != vlandev->features) netdev_features_change(vlandev); @@ -535,7 +541,6 @@ static struct notifier_block vlan_notifier_block __read_mostly = { static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; - unsigned short vid = 0; struct vlan_ioctl_args args; struct net_device *dev = NULL; @@ -562,8 +567,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) goto out; err = -EINVAL; - if (args.cmd != ADD_VLAN_CMD && - !(dev->priv_flags & IFF_802_1Q_VLAN)) + if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev)) goto out; } @@ -591,9 +595,9 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - err = vlan_dev_set_vlan_flag(dev, - args.u.flag, - args.vlan_qos); + err = vlan_dev_change_flags(dev, + args.vlan_qos ? args.u.flag : 0, + args.u.flag); break; case SET_VLAN_NAME_TYPE_CMD: @@ -637,8 +641,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) case GET_VLAN_VID_CMD: err = 0; - vlan_dev_get_vid(dev, &vid); - args.u.VID = vid; + args.u.VID = vlan_dev_vlan_id(dev); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; @@ -713,14 +716,20 @@ static int __init vlan_proto_init(void) if (err < 0) goto err2; - err = vlan_netlink_init(); + err = vlan_gvrp_init(); if (err < 0) goto err3; + err = vlan_netlink_init(); + if (err < 0) + goto err4; + dev_add_pack(&vlan_packet_type); vlan_ioctl_set(vlan_ioctl_handler); return 0; +err4: + vlan_gvrp_uninit(); err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: @@ -745,8 +754,9 @@ static void __exit vlan_cleanup_module(void) BUG_ON(!hlist_empty(&vlan_group_hash[i])); unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops); - synchronize_net(); + + vlan_gvrp_uninit(); } module_init(vlan_proto_init); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5229a72c7ea1..a6603a4d917f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -3,6 +3,55 @@ #include <linux/if_vlan.h> + +/** + * struct vlan_priority_tci_mapping - vlan egress priority mappings + * @priority: skb priority + * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 + * @next: pointer to next struct + */ +struct vlan_priority_tci_mapping { + u32 priority; + u16 vlan_qos; + struct vlan_priority_tci_mapping *next; +}; + +/** + * struct vlan_dev_info - VLAN private device data + * @nr_ingress_mappings: number of ingress priority mappings + * @ingress_priority_map: ingress priority mappings + * @nr_egress_mappings: number of egress priority mappings + * @egress_priority_map: hash of egress priority mappings + * @vlan_id: VLAN identifier + * @flags: device flags + * @real_dev: underlying netdevice + * @real_dev_addr: address of underlying netdevice + * @dent: proc dir entry + * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX + * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX + */ +struct vlan_dev_info { + unsigned int nr_ingress_mappings; + u32 ingress_priority_map[8]; + unsigned int nr_egress_mappings; + struct vlan_priority_tci_mapping *egress_priority_map[16]; + + u16 vlan_id; + u16 flags; + + struct net_device *real_dev; + unsigned char real_dev_addr[ETH_ALEN]; + + struct proc_dir_entry *dent; + unsigned long cnt_inc_headroom_on_tx; + unsigned long cnt_encap_on_xmit; +}; + +static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + #define VLAN_GRP_HASH_SHIFT 5 #define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT) #define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1) @@ -18,26 +67,47 @@ * Must be invoked with rcu_read_lock (ie preempt disabled) * or with RTNL. */ -struct net_device *__find_vlan_dev(struct net_device *real_dev, - unsigned short VID); /* vlan.c */ +struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id); /* found in vlan_dev.c */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); void vlan_dev_set_ingress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio); + u32 skb_prio, u16 vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val); + u32 skb_prio, u16 vlan_prio); +int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); -void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); -int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); +int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev); +static inline u32 vlan_get_ingress_priority(struct net_device *dev, + u16 vlan_tci) +{ + struct vlan_dev_info *vip = vlan_dev_info(dev); + + return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7]; +} + +#ifdef CONFIG_VLAN_8021Q_GVRP +extern int vlan_gvrp_request_join(const struct net_device *dev); +extern void vlan_gvrp_request_leave(const struct net_device *dev); +extern int vlan_gvrp_init_applicant(struct net_device *dev); +extern void vlan_gvrp_uninit_applicant(struct net_device *dev); +extern int vlan_gvrp_init(void); +extern void vlan_gvrp_uninit(void); +#else +static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_gvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_gvrp_init(void) { return 0; } +static inline void vlan_gvrp_uninit(void) {} +#endif + int vlan_netlink_init(void); void vlan_netlink_fini(void); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c new file mode 100644 index 000000000000..916061f681b6 --- /dev/null +++ b/net/8021q/vlan_core.c @@ -0,0 +1,64 @@ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include "vlan.h" + +/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ +int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, + u16 vlan_tci, int polling) +{ + struct net_device_stats *stats; + + if (skb_bond_should_drop(skb)) { + dev_kfree_skb_any(skb); + return NET_RX_DROP; + } + + skb->vlan_tci = vlan_tci; + netif_nit_deliver(skb); + + skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + if (skb->dev == NULL) { + dev_kfree_skb_any(skb); + /* Not NET_RX_DROP, this is not being dropped + * due to congestion. */ + return NET_RX_SUCCESS; + } + skb->dev->last_rx = jiffies; + skb->vlan_tci = 0; + + stats = &skb->dev->stats; + stats->rx_packets++; + stats->rx_bytes += skb->len; + + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); + switch (skb->pkt_type) { + case PACKET_BROADCAST: + break; + case PACKET_MULTICAST: + stats->multicast++; + break; + case PACKET_OTHERHOST: + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + skb->dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + break; + }; + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +struct net_device *vlan_dev_real_dev(const struct net_device *dev) +{ + return vlan_dev_info(dev)->real_dev; +} +EXPORT_SYMBOL_GPL(vlan_dev_real_dev); + +u16 vlan_dev_vlan_id(const struct net_device *dev) +{ + return vlan_dev_info(dev)->vlan_id; +} +EXPORT_SYMBOL_GPL(vlan_dev_vlan_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5d055c242ed8..8883e9c8a223 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -21,21 +21,15 @@ */ #include <linux/module.h> -#include <linux/mm.h> -#include <linux/in.h> -#include <linux/init.h> -#include <asm/uaccess.h> /* for copy_from_user */ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <net/datalink.h> -#include <net/p8022.h> +#include <linux/ethtool.h> #include <net/arp.h> #include "vlan.h" #include "vlanproc.h" #include <linux/if_vlan.h> -#include <net/ip.h> /* * Rebuild the Ethernet MAC header. This is called after an ARP @@ -54,7 +48,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) switch (veth->h_vlan_encapsulated_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): /* TODO: Confirm this will work with VLAN headers... */ return arp_find(veth->h_dest, skb); @@ -73,11 +67,8 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) { if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_shared(skb) || skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - kfree_skb(skb); - skb = nskb; - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + skb = NULL; if (skb) { /* Lifted from Gleb's VLAN code... */ memmove(skb->data - ETH_HLEN, @@ -149,9 +140,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; - unsigned short vid; struct net_device_stats *stats; - unsigned short vlan_TCI; + u16 vlan_id; + u16 vlan_tci; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) @@ -161,14 +152,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_free; vhdr = (struct vlan_hdr *)skb->data; - vlan_TCI = ntohs(vhdr->h_vlan_TCI); - vid = (vlan_TCI & VLAN_VID_MASK); + vlan_tci = ntohs(vhdr->h_vlan_TCI); + vlan_id = vlan_tci & VLAN_VID_MASK; rcu_read_lock(); - skb->dev = __find_vlan_dev(dev, vid); + skb->dev = __find_vlan_dev(dev, vlan_id); if (!skb->dev) { pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", - __func__, (unsigned int)vid, dev->name); + __func__, vlan_id, dev->name); goto err_unlock; } @@ -180,11 +171,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, skb_pull_rcsum(skb, VLAN_HLEN); - skb->priority = vlan_get_ingress_priority(skb->dev, - ntohs(vhdr->h_vlan_TCI)); + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); pr_debug("%s: priority: %u for TCI: %hu\n", - __func__, skb->priority, ntohs(vhdr->h_vlan_TCI)); + __func__, skb->priority, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ @@ -227,7 +217,7 @@ err_free: return NET_RX_DROP; } -static inline unsigned short +static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; @@ -259,103 +249,44 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned int len) { struct vlan_hdr *vhdr; - unsigned short veth_TCI = 0; - int rc = 0; - int build_vlan_header = 0; - struct net_device *vdev = dev; - - pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n", - __func__, skb, type, len, vlan_dev_info(dev)->vlan_id, - daddr); - - /* build vlan header only if re_order_header flag is NOT set. This - * fixes some programs that get confused when they see a VLAN device - * sending a frame that is VLAN encoded (the consensus is that the VLAN - * device should look completely like an Ethernet device when the - * REORDER_HEADER flag is set) The drawback to this is some extra - * header shuffling in the hard_start_xmit. Users can turn off this - * REORDER behaviour with the vconfig tool. - */ - if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) - build_vlan_header = 1; + unsigned int vhdrlen = 0; + u16 vlan_tci = 0; + int rc; - if (build_vlan_header) { - vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); + if (WARN_ON(skb_headroom(skb) < dev->hard_header_len)) + return -ENOSPC; - /* build the four bytes that make this a VLAN header. */ - - /* Now, construct the second two bytes. This field looks - * something like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - * - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); + if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { + vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); - vhdr->h_vlan_TCI = htons(veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + vhdr->h_vlan_TCI = htons(vlan_tci); /* * Set the protocol type. For a packet of type ETH_P_802_3 we * put the length in here instead. It is up to the 802.2 * layer to carry protocol information. */ - if (type != ETH_P_802_3) vhdr->h_vlan_encapsulated_proto = htons(type); else vhdr->h_vlan_encapsulated_proto = htons(len); skb->protocol = htons(ETH_P_8021Q); - skb_reset_network_header(skb); + type = ETH_P_8021Q; + vhdrlen = VLAN_HLEN; } /* Before delegating work to the lower layer, enter our MAC-address */ if (saddr == NULL) saddr = dev->dev_addr; + /* Now make the underlying real hard header */ dev = vlan_dev_info(dev)->real_dev; - - /* MPLS can send us skbuffs w/out enough space. This check will grow - * the skb if it doesn't have enough headroom. Not a beautiful solution, - * so I'll tick a counter so that users can know it's happening... - * If they care... - */ - - /* NOTE: This may still break if the underlying device is not the final - * device (and thus there are more headers to add...) It should work for - * good-ole-ethernet though. - */ - if (skb_headroom(skb) < dev->hard_header_len) { - struct sk_buff *sk_tmp = skb; - skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len); - kfree_skb(sk_tmp); - if (skb == NULL) { - struct net_device_stats *stats = &vdev->stats; - stats->tx_dropped++; - return -ENOMEM; - } - vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++; - pr_debug("%s: %s: had to grow skb\n", __func__, vdev->name); - } - - if (build_vlan_header) { - /* Now make the underlying real hard header */ - rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, - len + VLAN_HLEN); - if (rc > 0) - rc += VLAN_HLEN; - else if (rc < 0) - rc -= VLAN_HLEN; - } else - /* If here, then we'll just make a normal looking ethernet - * frame, but, the hard_start_xmit method will insert the tag - * (it has to be able to do this for bridged and other skbs - * that don't come down the protocol stack in an orderly manner. - */ - rc = dev_hard_header(skb, dev, type, daddr, saddr, len); - + rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); + if (rc > 0) + rc += vhdrlen; return rc; } @@ -369,78 +300,49 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { - int orig_headroom = skb_headroom(skb); - unsigned short veth_TCI; + vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { + unsigned int orig_headroom = skb_headroom(skb); + u16 vlan_tci; - /* This is not a VLAN frame...but we can fix that! */ vlan_dev_info(dev)->cnt_encap_on_xmit++; - pr_debug("%s: proto to encap: 0x%hx\n", - __func__, ntohs(veth->h_vlan_proto)); - /* Construct the second two bytes. This field looks something - * like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); - - skb = __vlan_put_tag(skb, veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + skb = __vlan_put_tag(skb, vlan_tci); if (!skb) { stats->tx_dropped++; - return 0; + return NETDEV_TX_OK; } if (orig_headroom < VLAN_HLEN) vlan_dev_info(dev)->cnt_inc_headroom_on_tx++; } - pr_debug("%s: about to send skb: %p to dev: %s\n", - __func__, skb, skb->dev->name); - pr_debug(" " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n", - veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], - veth->h_dest[3], veth->h_dest[4], veth->h_dest[5], - veth->h_source[0], veth->h_source[1], veth->h_source[2], - veth->h_source[3], veth->h_source[4], veth->h_source[5], - veth->h_vlan_proto, veth->h_vlan_TCI, - veth->h_vlan_encapsulated_proto); - - stats->tx_packets++; /* for statics only */ + stats->tx_packets++; stats->tx_bytes += skb->len; skb->dev = vlan_dev_info(dev)->real_dev; dev_queue_xmit(skb); - - return 0; + return NETDEV_TX_OK; } static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; - unsigned short veth_TCI; + u16 vlan_tci; - /* Construct the second two bytes. This field looks something - * like: - * usr_priority: 3 bits (high bits) - * CFI 1 bit - * VLAN ID 12 bits (low bits) - */ - veth_TCI = vlan_dev_info(dev)->vlan_id; - veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, veth_TCI); + vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + skb = __vlan_hwaccel_put_tag(skb, vlan_tci); stats->tx_packets++; stats->tx_bytes += skb->len; skb->dev = vlan_dev_info(dev)->real_dev; dev_queue_xmit(skb); - - return 0; + return NETDEV_TX_OK; } static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) @@ -457,7 +359,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) } void vlan_dev_set_ingress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio) + u32 skb_prio, u16 vlan_prio) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -470,7 +372,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, } int vlan_dev_set_egress_priority(const struct net_device *dev, - u32 skb_prio, short vlan_prio) + u32 skb_prio, u16 vlan_prio) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct vlan_priority_tci_mapping *mp = NULL; @@ -507,18 +409,23 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, } /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val) +int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) { - /* verify flag is supported */ - if (flag == VLAN_FLAG_REORDER_HDR) { - if (flag_val) - vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + u32 old_flags = vlan->flags; + + if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + return -EINVAL; + + vlan->flags = (old_flags & ~mask) | (flags & mask); + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) { + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); else - vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; - return 0; + vlan_gvrp_request_leave(dev); } - return -EINVAL; + return 0; } void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) @@ -526,11 +433,6 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) strncpy(result, vlan_dev_info(dev)->real_dev->name, 23); } -void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) -{ - *result = vlan_dev_info(dev)->vlan_id; -} - static int vlan_dev_open(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -543,21 +445,44 @@ static int vlan_dev_open(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN); if (err < 0) - return err; + goto out; + } + + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(real_dev, 1); + if (err < 0) + goto del_unicast; } + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(real_dev, 1); + if (err < 0) + goto clear_allmulti; + } + memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); - if (dev->flags & IFF_ALLMULTI) - dev_set_allmulti(real_dev, 1); - if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(real_dev, 1); + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); return 0; + +clear_allmulti: + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, -1); +del_unicast: + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) + dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); +out: + return err; } static int vlan_dev_stop(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); dev_mc_unsync(real_dev, dev); dev_unicast_unsync(real_dev, dev); @@ -644,6 +569,24 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static struct lock_class_key vlan_netdev_addr_lock_key; + +static void vlan_dev_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_subclass) +{ + lockdep_set_class_and_subclass(&txq->_xmit_lock, + &vlan_netdev_xmit_lock_key, + *(int *)_subclass); +} + +static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) +{ + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key, + subclass); + netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); +} static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, @@ -664,6 +607,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->features |= real_dev->features & real_dev->vlan_features; + dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; @@ -683,11 +627,10 @@ static int vlan_dev_init(struct net_device *dev) dev->hard_start_xmit = vlan_dev_hard_start_xmit; } - if (real_dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(real_dev)) subclass = 1; - lockdep_set_class_and_subclass(&dev->_xmit_lock, - &vlan_netdev_xmit_lock_key, subclass); + vlan_dev_set_lockdep_class(dev, subclass); return 0; } @@ -705,6 +648,35 @@ static void vlan_dev_uninit(struct net_device *dev) } } +static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (real_dev->ethtool_ops == NULL || + real_dev->ethtool_ops->get_rx_csum == NULL) + return 0; + return real_dev->ethtool_ops->get_rx_csum(real_dev); +} + +static u32 vlan_ethtool_get_flags(struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (!(real_dev->features & NETIF_F_HW_VLAN_RX) || + real_dev->ethtool_ops == NULL || + real_dev->ethtool_ops->get_flags == NULL) + return 0; + return real_dev->ethtool_ops->get_flags(real_dev); +} + +static const struct ethtool_ops vlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_rx_csum = vlan_ethtool_get_rx_csum, + .get_flags = vlan_ethtool_get_flags, +}; + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -723,6 +695,7 @@ void vlan_setup(struct net_device *dev) dev->change_rx_flags = vlan_dev_change_rx_flags; dev->do_ioctl = vlan_dev_ioctl; dev->destructor = free_netdev; + dev->ethtool_ops = &vlan_ethtool_ops; memset(dev->broadcast, 0, ETH_ALEN); } diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c new file mode 100644 index 000000000000..061ceceeef12 --- /dev/null +++ b/net/8021q/vlan_gvrp.c @@ -0,0 +1,66 @@ +/* + * IEEE 802.1Q GARP VLAN Registration Protocol (GVRP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_vlan.h> +#include <net/garp.h> +#include "vlan.h" + +#define GARP_GVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum gvrp_attributes { + GVRP_ATTR_INVALID, + GVRP_ATTR_VID, + __GVRP_ATTR_MAX +}; +#define GVRP_ATTR_MAX (__GVRP_ATTR_MAX - 1) + +static struct garp_application vlan_gvrp_app __read_mostly = { + .proto.group_address = GARP_GVRP_ADDRESS, + .maxattr = GVRP_ATTR_MAX, + .type = GARP_APPLICATION_GVRP, +}; + +int vlan_gvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return garp_request_join(vlan->real_dev, &vlan_gvrp_app, + &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); +} + +void vlan_gvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + garp_request_leave(vlan->real_dev, &vlan_gvrp_app, + &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); +} + +int vlan_gvrp_init_applicant(struct net_device *dev) +{ + return garp_init_applicant(dev, &vlan_gvrp_app); +} + +void vlan_gvrp_uninit_applicant(struct net_device *dev) +{ + garp_uninit_applicant(dev, &vlan_gvrp_app); +} + +int __init vlan_gvrp_init(void) +{ + return garp_register_application(&vlan_gvrp_app); +} + +void vlan_gvrp_uninit(void) +{ + garp_unregister_application(&vlan_gvrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index c93e69ec28ed..e9c91dcecc9b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -59,7 +59,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) } if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) + if ((flags->flags & flags->mask) & + ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) return -EINVAL; } @@ -75,7 +76,6 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; @@ -83,8 +83,7 @@ static int vlan_changelink(struct net_device *dev, if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan->flags = (vlan->flags & ~flags->mask) | - (flags->flags & flags->mask); + vlan_dev_change_flags(dev, flags->flags, flags->mask); } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 08b54b593d56..0feefa4e1a4b 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -18,16 +18,9 @@ *****************************************************************************/ #include <linux/module.h> -#include <linux/stddef.h> /* offsetof(), etc. */ -#include <linux/errno.h> /* return codes */ +#include <linux/errno.h> #include <linux/kernel.h> -#include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/mm.h> -#include <linux/string.h> /* inline mem*, str* functions */ -#include <linux/init.h> /* __initfunc et al. */ -#include <asm/byteorder.h> /* htons(), etc. */ -#include <asm/uaccess.h> /* copy_to_user */ -#include <asm/io.h> +#include <linux/string.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/fs.h> @@ -290,7 +283,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) static const char fmt[] = "%30s %12lu\n"; int i; - if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) + if (!is_vlan_dev(vlandev)) return 0; seq_printf(seq, diff --git a/net/9p/client.c b/net/9p/client.c index 2ffe40cf2f01..e053e06028a5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -52,7 +52,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, @@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; @@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) continue; } } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + kfree(options); return ret; } @@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans_mod = NULL; clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } + v9fs_put_trans(clnt->trans_mod); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); diff --git a/net/9p/conv.c b/net/9p/conv.c index 44547201f5bc..5ad3a3bd73b2 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; memmove(*pdata, data, count); - return count; + return 0; } static int @@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; return copy_from_user(*pdata, data, count); } diff --git a/net/9p/mod.c b/net/9p/mod.c index bdee1fb7cc62..1084feb24cb0 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -31,6 +31,7 @@ #include <linux/parser.h> #include <net/9p/transport.h> #include <linux/list.h> +#include <linux/spinlock.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level"); * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p @@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport; */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name * @name: string identifying transport * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_get_trans_by_name); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_default_trans - get the default transport * */ -struct p9_trans_module *v9fs_default_trans(void) +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** * v9fs_init - Initialize module @@ -120,6 +160,8 @@ static int __init init_p9(void) static void __exit exit_p9(void) { printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4507f744f44e..6dabbdb66651 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -86,7 +86,7 @@ enum { Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, @@ -151,7 +151,6 @@ struct p9_mux_poll_task { * @trans: reference to transport instance for this connection * @tagpool: id accounting for transactions * @err: error state - * @equeue: event wait_q (?) * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rcall: current response &p9_fcall structure @@ -178,7 +177,6 @@ struct p9_conn { struct p9_trans *trans; struct p9_idpool *tagpool; int err; - wait_queue_head_t equeue; struct list_head req_list; struct list_head unsent_req_list; struct p9_fcall *rcall; @@ -240,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, static void p9_conn_cancel(struct p9_conn *m, int err); -static int p9_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; - - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - static u16 p9_mux_get_tag(struct p9_conn *m) { int tag; @@ -409,11 +391,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; - struct p9_conn *m, *mtmp; + struct p9_conn *m; P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, trans->msize); - m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -424,25 +406,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) m->trans = trans; m->tagpool = p9_idpool_create(); if (IS_ERR(m->tagpool)) { - mtmp = ERR_PTR(-ENOMEM); kfree(m); - return mtmp; + return ERR_PTR(-ENOMEM); } - m->err = 0; - init_waitqueue_head(&m->equeue); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; n = p9_mux_poll_start(m); if (n) { kfree(m); @@ -463,10 +434,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { if (IS_ERR(m->poll_waddr[i])) { p9_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ kfree(m); - m = mtmp; - break; + return (void *)m->poll_waddr; /* the error code */ } } @@ -483,18 +452,13 @@ static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); - p9_conn_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p waiting for empty request queue\n", m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); + + p9_conn_cancel(m, -ECONNRESET); + m->trans = NULL; p9_idpool_destroy(m->tagpool); kfree(m); @@ -840,8 +804,6 @@ static void p9_read_work(struct work_struct *work) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } } else { if (err >= 0 && rcall->id != P9_RFLUSH) @@ -908,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m, else n = p9_mux_get_tag(m); - if (n < 0) + if (n < 0) { + kfree(req); return ERR_PTR(-ENOMEM); + } p9_set_tag(tc, n); @@ -984,8 +948,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } kfree(freq->tcall); @@ -1191,8 +1153,6 @@ void p9_conn_cancel(struct p9_conn *m, int err) else kfree(req->rcall); } - - wake_up(&m->equeue); } /** @@ -1285,7 +1245,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) int fd, ret; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket); + fd = sock_map_fd(csocket, 0); if (fd < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); return fd; @@ -1370,7 +1330,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; - mm_segment_t oldfs; if (trans && trans->status == Connected) ts = trans->priv; @@ -1384,24 +1343,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) if (!ts->wr->f_op || !ts->wr->f_op->poll) return -EIO; - oldfs = get_fs(); - set_fs(get_ds()); - ret = ts->rd->f_op->poll(ts->rd, pt); if (ret < 0) - goto end; + return ret; if (ts->rd != ts->wr) { n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } + if (n < 0) + return n; ret = (ret & ~POLLOUT) | (n & ~POLLIN); } -end: - set_fs(oldfs); return ret; } @@ -1629,6 +1581,7 @@ static struct p9_trans_module p9_tcp_trans = { .maxsize = MAX_SOCK_BUF, .def = 1, .create = p9_trans_create_tcp, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { @@ -1636,6 +1589,7 @@ static struct p9_trans_module p9_unix_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_unix, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { @@ -1643,14 +1597,20 @@ static struct p9_trans_module p9_fd_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_fd, + .owner = THIS_MODULE, }; int p9_trans_fd_init(void) { - int ret = p9_mux_global_init(); - if (ret) { - printk(KERN_WARNING "9p: starting mux failed\n"); - return ret; + int i; + + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) + p9_mux_poll_tasks[i].task = NULL; + + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; } v9fs_register_trans(&p9_tcp_trans); @@ -1659,4 +1619,12 @@ int p9_trans_fd_init(void) return 0; } -EXPORT_SYMBOL(p9_trans_fd_init); + +void p9_trans_fd_exit(void) +{ + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); + + destroy_workqueue(p9_mux_wq); +} diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 42adc052b149..94912e077a55 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .maxsize = PAGE_SIZE*16, .def = 0, + .owner = THIS_MODULE, }; /* The standard init function */ @@ -545,6 +546,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); diff --git a/net/Kconfig b/net/Kconfig index acbf7c60e89b..d789d79551ae 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -2,9 +2,7 @@ # Network configuration # -menu "Networking" - -config NET +menuconfig NET bool "Networking support" ---help--- Unless you really know what you are doing, you should say Y here. @@ -22,7 +20,6 @@ config NET recommended to read the NET-HOWTO, available from <http://www.tldp.org/docs.html#howto>. -# Make sure that all config symbols are dependent on NET if NET menu "Networking options" @@ -181,7 +178,9 @@ source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/tipc/Kconfig" source "net/atm/Kconfig" +source "net/802/Kconfig" source "net/bridge/Kconfig" +source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" @@ -234,22 +233,25 @@ source "net/can/Kconfig" source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" +source "net/phonet/Kconfig" config FIB_RULES bool -menu "Wireless" +menuconfig WIRELESS + bool "Wireless" depends on !S390 + default y + +if WIRELESS source "net/wireless/Kconfig" source "net/mac80211/Kconfig" source "net/ieee80211/Kconfig" -endmenu +endif # WIRELESS source "net/rfkill/Kconfig" source "net/9p/Kconfig" endif # if NET -endmenu # Networking - diff --git a/net/Makefile b/net/Makefile index b7a13643b549..27d1f10dc0e0 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_SCHED) += sched/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_WAN_ROUTER) += wanrouter/ @@ -42,7 +43,10 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ -obj-$(CONFIG_VLAN_8021Q) += 8021q/ +obj-$(CONFIG_PHONET) += phonet/ +ifneq ($(CONFIG_VLAN_8021Q),) +obj-y += 8021q/ +endif obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-y += wireless/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 25aa37ce9430..b25c1e909d14 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -333,7 +333,7 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_DOWN) { @@ -716,7 +716,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto out0; /* We only do Ethernet SNAP AARP. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 44cd42f7786b..d3134e7e6ee8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -2,7 +2,7 @@ * DDP: An implementation of the AppleTalk DDP protocol for * Ethernet 'ELAP'. * - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * With more than a little assistance from * @@ -648,7 +648,7 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_DOWN) @@ -959,7 +959,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -986,7 +986,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1405,7 +1405,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto freeit; /* Don't mangle buffer if shared */ @@ -1493,7 +1493,7 @@ freeit: static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto freeit; /* Expand any short form frames */ @@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void) module_exit(atalk_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>"); +MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); MODULE_DESCRIPTION("AppleTalk 0.20\n"); MODULE_ALIAS_NETPROTO(PF_APPLETALK); diff --git a/net/atm/addr.c b/net/atm/addr.c index 6afa77d63bb5..82e85abc303d 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -9,7 +9,7 @@ #include "signaling.h" #include "addr.h" -static int check_addr(struct sockaddr_atmsvc *addr) +static int check_addr(const struct sockaddr_atmsvc *addr) { int i; @@ -23,7 +23,7 @@ static int check_addr(struct sockaddr_atmsvc *addr) return -EINVAL; } -static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b) +static int identical(const struct sockaddr_atmsvc *a, const struct sockaddr_atmsvc *b) { if (*a->sas_addr.prv) if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN)) @@ -35,7 +35,7 @@ static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b) return !strcmp(a->sas_addr.pub, b->sas_addr.pub); } -static void notify_sigd(struct atm_dev *dev) +static void notify_sigd(const struct atm_dev *dev) { struct sockaddr_atmpvc pvc; @@ -63,7 +63,7 @@ void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype) notify_sigd(dev); } -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t atype) { unsigned long flags; @@ -98,7 +98,7 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, return 0; } -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t atype) { unsigned long flags; diff --git a/net/atm/addr.h b/net/atm/addr.h index f39433ad45da..6837e9e7eb13 100644 --- a/net/atm/addr.h +++ b/net/atm/addr.h @@ -10,9 +10,9 @@ #include <linux/atmdev.h> void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type); -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t type); -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, +int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr, enum atm_addr_type_t type); int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf, size_t size, enum atm_addr_type_t type); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 05fafdc2eea3..280de481edc7 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -52,12 +52,12 @@ static void skb_debug(const struct sk_buff *skb) #define ETHERTYPE_IPV6 0x86, 0xdd #define PAD_BRIDGED 0x00, 0x00 -static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 }; -static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 }; -static unsigned char llc_oui_pid_pad[] = +static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 }; +static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 }; +static const unsigned char llc_oui_pid_pad[] = { LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED }; -static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 }; -static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 }; +static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 }; +static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 }; enum br2684_encaps { e_vc = BR2684_ENCAPS_VC, @@ -217,8 +217,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, return 1; } -static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb, - struct br2684_dev *brdev) +static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb, + const struct br2684_dev *brdev) { return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */ } @@ -375,11 +375,11 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) if (memcmp (skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else if (memcmp (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); @@ -404,9 +404,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_reset_network_header(skb); iph = ip_hdr(skb); if (iph->version == 4) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else if (iph->version == 6) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else goto error; skb->pkt_type = PACKET_HOST; diff --git a/net/atm/clip.c b/net/atm/clip.c index 6f8223ebf551..5b5b96344ce6 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -612,7 +612,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { diff --git a/net/atm/common.c b/net/atm/common.c index c865517ba449..d34edbe754c8 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -262,7 +262,7 @@ static int adjust_tp(struct atm_trafprm *tp,unsigned char aal) } -static int check_ci(struct atm_vcc *vcc, short vpi, int vci) +static int check_ci(const struct atm_vcc *vcc, short vpi, int vci) { struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)]; @@ -290,7 +290,7 @@ static int check_ci(struct atm_vcc *vcc, short vpi, int vci) } -static int find_ci(struct atm_vcc *vcc, short *vpi, int *vci) +static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci) { static short p; /* poor man's per-device cache */ static int c; @@ -646,7 +646,7 @@ static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) } -static int check_tp(struct atm_trafprm *tp) +static int check_tp(const struct atm_trafprm *tp) { /* @@@ Should be merged with adjust_tp */ if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0; @@ -663,7 +663,7 @@ static int check_tp(struct atm_trafprm *tp) } -static int check_qos(struct atm_qos *qos) +static int check_qos(const struct atm_qos *qos) { int error; diff --git a/net/atm/lec.c b/net/atm/lec.c index 653aca3573ac..8f701cde5945 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -65,36 +65,36 @@ static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr); + const unsigned char *mac_addr); static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove); /* LANE2 functions */ -static void lane2_associate_ind(struct net_device *dev, u8 *mac_address, - u8 *tlvs, u32 sizeoftlvs); -static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, +static void lane2_associate_ind(struct net_device *dev, const u8 *mac_address, + const u8 *tlvs, u32 sizeoftlvs); +static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs); -static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); +static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs); -static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +static int lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent); static void lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb); static void lec_arp_destroy(struct lec_priv *priv); static void lec_arp_init(struct lec_priv *priv); static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - unsigned char *mac_to_find, + const unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry); -static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, +static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, + const unsigned char *atm_addr, unsigned long remoteflag, unsigned int targetless_le_arp); static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id); static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc); static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, + const unsigned char *atm_addr, unsigned long tran_id); -static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, +static void lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)); @@ -634,7 +634,7 @@ static struct atm_dev lecatm_dev = { */ static int send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, - unsigned char *mac_addr, unsigned char *atm_addr, + const unsigned char *mac_addr, const unsigned char *atm_addr, struct sk_buff *data) { struct sock *sk; @@ -705,10 +705,9 @@ static void lec_init(struct net_device *dev) dev->set_multicast_list = lec_set_multicast_list; dev->do_ioctl = NULL; printk("%s: Initialized!\n", dev->name); - return; } -static unsigned char lec_ctrl_magic[] = { +static const unsigned char lec_ctrl_magic[] = { 0xff, 0x00, 0x01, @@ -1276,7 +1275,7 @@ module_exit(lane_module_cleanup); * lec will be used. * If dst_mac == NULL, targetless LE_ARP will be sent */ -static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, +static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; @@ -1322,8 +1321,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, * Returns 1 for success, 0 for failure (out of memory) * */ -static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs) +static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs) { int retval; struct sk_buff *skb; @@ -1358,8 +1357,8 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, * LANE2: 3.1.5, LE_ASSOCIATE.indication * */ -static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs) +static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs) { #if 0 int i = 0; @@ -1744,7 +1743,7 @@ static void lec_arp_destroy(struct lec_priv *priv) * Find entry by mac_address */ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr) + const unsigned char *mac_addr) { struct hlist_node *node; struct hlist_head *head; @@ -1764,7 +1763,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, } static struct lec_arp_table *make_entry(struct lec_priv *priv, - unsigned char *mac_addr) + const unsigned char *mac_addr) { struct lec_arp_table *to_return; @@ -1921,7 +1920,7 @@ restart: * */ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, - unsigned char *mac_to_find, int is_rdesc, + const unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry) { unsigned long flags; @@ -1932,7 +1931,6 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, switch (priv->lane_version) { case 1: return priv->mcast_vcc; - break; case 2: /* LANE2 wants arp for multicast addresses */ if (!compare_ether_addr(mac_to_find, bus_mac)) return priv->mcast_vcc; @@ -2017,7 +2015,7 @@ out: } static int -lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent) { unsigned long flags; @@ -2047,8 +2045,8 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, * Notifies: Response to arp_request (atm_addr != NULL) */ static void -lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, +lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, + const unsigned char *atm_addr, unsigned long remoteflag, unsigned int targetless_le_arp) { unsigned long flags; @@ -2148,7 +2146,7 @@ out: * Notifies: Vcc setup ready */ static void -lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, +lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { @@ -2336,7 +2334,7 @@ restart: static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, unsigned long tran_id) + const unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; struct hlist_node *node; diff --git a/net/atm/lec.h b/net/atm/lec.h index b41cda7ea1e1..0d376682c1a3 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -42,12 +42,12 @@ struct lecdatahdr_8025 { * */ struct lane2_ops { - int (*resolve) (struct net_device *dev, u8 *dst_mac, int force, + int (*resolve) (struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req) (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator) (struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs); + int (*associate_req) (struct net_device *dev, const u8 *lan_dst, + const u8 *tlvs, u32 sizeoftlvs); + void (*associate_indicator) (struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs); }; /* diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 9db332e7a6c0..11b16d16661c 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -62,11 +62,13 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg, struct mpoa_client *mpc static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc); static void set_mps_mac_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc); -static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac, - uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type); +static const uint8_t *copy_macs(struct mpoa_client *mpc, + const uint8_t *router_mac, + const uint8_t *tlvs, uint8_t mps_macs, + uint8_t device_type); static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry); -static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc); +static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc); static void mpoad_close(struct atm_vcc *vcc); static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb); @@ -351,12 +353,12 @@ static const char *mpoa_device_type_string(char type) * lec sees a TLV it uses the pointer to call this function. * */ -static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr, - uint8_t *tlvs, uint32_t sizeoftlvs) +static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs) { uint32_t type; uint8_t length, mpoa_device_type, number_of_mps_macs; - uint8_t *end_of_tlvs; + const uint8_t *end_of_tlvs; struct mpoa_client *mpc; mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */ @@ -430,8 +432,10 @@ static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr, * plus the possible MAC address(es) to mpc->mps_macs. * For a freshly allocated MPOA client mpc->mps_macs == 0. */ -static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac, - uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type) +static const uint8_t *copy_macs(struct mpoa_client *mpc, + const uint8_t *router_mac, + const uint8_t *tlvs, uint8_t mps_macs, + uint8_t device_type) { int num_macs; num_macs = (mps_macs > 1) ? mps_macs : 1; @@ -811,7 +815,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg) return arg; } -static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc) +static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc) { struct k_message mesg; @@ -964,7 +968,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo dev = (struct net_device *)dev_ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->name == NULL || strncmp(dev->name, "lec", 3)) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2712544cf0ca..28c71574a781 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -116,7 +116,7 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Reject non AX.25 devices */ @@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); + /* 9A4GL: hack to release unaccepted sockets */ + skb->sk->sk_state = TCP_LISTEN; + ax25_start_heartbeat(sax25); sax25->state = AX25_STATE_0; } @@ -893,13 +896,11 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sk->sk_destruct = ax25_free_sock; sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); oax25 = ax25_sk(osk); @@ -1361,13 +1362,11 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) goto out; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; newsock->state = SS_CONNECTED; out: diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 33790a8efbc8..4a5ba978a804 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -451,7 +451,7 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ - if (dev_net(dev) != &init_net) { + if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); return 0; } diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index f597987b2424..f288fc4aef9b 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -36,6 +36,7 @@ static struct ctl_path ax25_path[] = { { .procname = "ax25", .ctl_name = NET_AX25, }, { } }; + static const ctl_table ax25_param_table[] = { { .ctl_name = NET_AX25_IP_DEFAULT_MODE, @@ -167,6 +168,7 @@ static const ctl_table ax25_param_table[] = { .extra1 = &min_proto, .extra2 = &max_proto }, +#ifdef CONFIG_AX25_DAMA_SLAVE { .ctl_name = NET_AX25_DAMA_SLAVE_TIMEOUT, .procname = "dama_slave_timeout", @@ -177,6 +179,8 @@ static const ctl_table ax25_param_table[] = { .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, +#endif + { .ctl_name = 0 } /* that's all, folks! */ }; @@ -210,16 +214,6 @@ void ax25_register_sysctl(void) ax25_table[n].procname = ax25_dev->dev->name; ax25_table[n].mode = 0555; -#ifndef CONFIG_AX25_DAMA_SLAVE - /* - * We do not wish to have a representation of this parameter - * in /proc/sys/ when configured *not* to include the - * AX.25 DAMA slave code, do we? - */ - - child[AX25_VALUES_DS_TIMEOUT].procname = NULL; -#endif - child[AX25_MAX_VALUES].ctl_name = 0; /* just in case... */ for (k = 0; k < AX25_MAX_VALUES; k++) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d366423c8392..f6348e078aa4 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -36,6 +36,7 @@ #include <linux/init.h> #include <linux/poll.h> #include <net/sock.h> +#include <asm/ioctls.h> #if defined(CONFIG_KMOD) #include <linux/kmod.h> @@ -48,7 +49,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.11" +#define VERSION "2.13" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -266,6 +267,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err == 0) + sock_recv_timestamp(msg, sk, skb); skb_free_datagram(sk, skb); @@ -329,6 +332,54 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w } EXPORT_SYMBOL(bt_sock_poll); +int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + long amount; + int err; + + BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); + + switch (cmd) { + case TIOCOUTQ: + if (sk->sk_state == BT_LISTEN) + return -EINVAL; + + amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + if (amount < 0) + amount = 0; + err = put_user(amount, (int __user *) arg); + break; + + case TIOCINQ: + if (sk->sk_state == BT_LISTEN) + return -EINVAL; + + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + release_sock(sk); + err = put_user(amount, (int __user *) arg); + break; + + case SIOCGSTAMP: + err = sock_get_timestamp(sk, (struct timeval __user *) arg); + break; + + case SIOCGSTAMPNS: + err = sock_get_timestampns(sk, (struct timespec __user *) arg); + break; + + default: + err = -ENOIOCTLCMD; + break; + } + + return err; +} +EXPORT_SYMBOL(bt_sock_ioctl); + int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); @@ -405,7 +456,7 @@ static void __exit bt_exit(void) subsys_initcall(bt_init); module_exit(bt_exit); -MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); +MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index e69244dd8de8..b69bf4e7c48b 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -16,10 +16,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* - * $Id: bnep.h,v 1.5 2002/08/04 21:23:58 maxk Exp $ - */ - #ifndef _BNEP_H #define _BNEP_H diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index f85d94643aaf..80ba30cf4b68 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -25,10 +25,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/kernel.h> @@ -61,7 +57,10 @@ #define BT_DBG(D...) #endif -#define VERSION "1.2" +#define VERSION "1.3" + +static int compress_src = 1; +static int compress_dst = 1; static LIST_HEAD(bnep_session_list); static DECLARE_RWSEM(bnep_session_sem); @@ -422,10 +421,10 @@ static inline int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb) iv[il++] = (struct kvec) { &type, 1 }; len++; - if (!compare_ether_addr(eh->h_dest, s->eh.h_source)) + if (compress_src && !compare_ether_addr(eh->h_dest, s->eh.h_source)) type |= 0x01; - if (!compare_ether_addr(eh->h_source, s->eh.h_dest)) + if (compress_dst && !compare_ether_addr(eh->h_source, s->eh.h_dest)) type |= 0x02; if (type) @@ -507,6 +506,11 @@ static int bnep_session(void *arg) /* Delete network device */ unregister_netdev(dev); + /* Wakeup user-space polling for socket errors */ + s->sock->sk->sk_err = EUNATCH; + + wake_up_interruptible(s->sock->sk->sk_sleep); + /* Release the socket */ fput(s->sock->file); @@ -726,7 +730,13 @@ static void __exit bnep_exit(void) module_init(bnep_init); module_exit(bnep_exit); -MODULE_AUTHOR("David Libault <david.libault@inventel.fr>, Maxim Krasnyansky <maxk@qualcomm.com>"); +module_param(compress_src, bool, 0644); +MODULE_PARM_DESC(compress_src, "Compress sources headers"); + +module_param(compress_dst, bool, 0644); +MODULE_PARM_DESC(compress_dst, "Compress destination headers"); + +MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 95e3837e4312..d9fa0ab2c87f 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -25,10 +25,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/socket.h> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 201e5b1ce473..8ffb57f2303a 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -24,10 +24,6 @@ SOFTWARE IS DISCLAIMED. */ -/* - * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $ - */ - #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f8880261da0e..b7002429f152 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -59,24 +59,31 @@ void hci_acl_connect(struct hci_conn *conn) BT_DBG("%p", conn); conn->state = BT_CONNECT; - conn->out = 1; + conn->out = 1; + conn->link_mode = HCI_LM_MASTER; conn->attempt++; + conn->link_policy = hdev->link_policy; + memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)) && - inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { - cp.pscan_rep_mode = ie->data.pscan_rep_mode; - cp.pscan_mode = ie->data.pscan_mode; - cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000); + if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) { + if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { + cp.pscan_rep_mode = ie->data.pscan_rep_mode; + cp.pscan_mode = ie->data.pscan_mode; + cp.clock_offset = ie->data.clock_offset | + cpu_to_le16(0x8000); + } + memcpy(conn->dev_class, ie->data.dev_class, 3); + conn->ssp_mode = ie->data.ssp_mode; } - cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK); + cp.pkt_type = cpu_to_le16(conn->pkt_type); if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) cp.role_switch = 0x01; else @@ -122,7 +129,7 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) conn->out = 1; cp.handle = cpu_to_le16(handle); - cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); + cp.pkt_type = cpu_to_le16(conn->pkt_type); hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); } @@ -138,7 +145,7 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) conn->out = 1; cp.handle = cpu_to_le16(handle); - cp.pkt_type = cpu_to_le16(hdev->esco_type); + cp.pkt_type = cpu_to_le16(conn->pkt_type); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); @@ -163,11 +170,13 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: + case BT_CONNECT2: if (conn->type == ACL_LINK) hci_acl_connect_cancel(conn); else hci_acl_disconn(conn, 0x13); break; + case BT_CONFIG: case BT_CONNECTED: hci_acl_disconn(conn, 0x13); break; @@ -199,13 +208,28 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) return NULL; bacpy(&conn->dst, dst); - conn->hdev = hdev; - conn->type = type; - conn->mode = HCI_CM_ACTIVE; - conn->state = BT_OPEN; + conn->hdev = hdev; + conn->type = type; + conn->mode = HCI_CM_ACTIVE; + conn->state = BT_OPEN; conn->power_save = 1; + switch (type) { + case ACL_LINK: + conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; + break; + case SCO_LINK: + if (lmp_esco_capable(hdev)) + conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; + else + conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; + break; + case ESCO_LINK: + conn->pkt_type = hdev->esco_type; + break; + } + skb_queue_head_init(&conn->data_q); setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn); @@ -221,8 +245,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); - hci_conn_add_sysfs(conn); - tasklet_enable(&hdev->tx_task); return conn; @@ -254,12 +276,14 @@ int hci_conn_del(struct hci_conn *conn) } tasklet_disable(&hdev->tx_task); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); + tasklet_enable(&hdev->tx_task); + skb_queue_purge(&conn->data_q); - hci_conn_del_sysfs(conn); return 0; } @@ -306,7 +330,7 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -320,8 +344,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_hold(acl); - if (acl->state == BT_OPEN || acl->state == BT_CLOSED) + if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + acl->auth_type = auth_type; hci_acl_connect(acl); + } if (type == ACL_LINK) return acl; @@ -350,18 +376,39 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) } EXPORT_SYMBOL(hci_connect); +/* Check link security requirement */ +int hci_conn_check_link_mode(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0 && + !(conn->link_mode & HCI_LM_ENCRYPT)) + return 0; + + return 1; +} +EXPORT_SYMBOL(hci_conn_check_link_mode); + /* Authenticate remote device */ int hci_conn_auth(struct hci_conn *conn) { BT_DBG("conn %p", conn); + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { + if (!(conn->auth_type & 0x01)) { + conn->auth_type |= 0x01; + conn->link_mode &= ~HCI_LM_AUTH; + } + } + if (conn->link_mode & HCI_LM_AUTH) return 1; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, + sizeof(cp), &cp); } return 0; } @@ -373,7 +420,7 @@ int hci_conn_encrypt(struct hci_conn *conn) BT_DBG("conn %p", conn); if (conn->link_mode & HCI_LM_ENCRYPT) - return 1; + return hci_conn_auth(conn); if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; @@ -382,7 +429,8 @@ int hci_conn_encrypt(struct hci_conn *conn) struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); } return 0; } @@ -396,7 +444,8 @@ int hci_conn_change_link_key(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_change_conn_link_key cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, + sizeof(cp), &cp); } return 0; } @@ -498,6 +547,8 @@ void hci_conn_hash_flush(struct hci_dev *hdev) c->state = BT_CLOSED; + hci_conn_del_sysfs(c); + hci_proto_disconn_ind(c, 0x16); hci_conn_del(c); } @@ -600,3 +651,23 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) return copy_to_user(ptr, &ci, sizeof(ci)) ? -EFAULT : 0; } + +int hci_get_auth_info(struct hci_dev *hdev, void __user *arg) +{ + struct hci_auth_info_req req; + struct hci_conn *conn; + + if (copy_from_user(&req, arg, sizeof(req))) + return -EFAULT; + + hci_dev_lock_bh(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &req.bdaddr); + if (conn) + req.type = conn->auth_type; + hci_dev_unlock_bh(hdev); + + if (!conn) + return -ENOENT; + + return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aec6929f5c16..278a3ace14f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -164,6 +164,9 @@ static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev * { int ret; + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + /* Serialize all requests */ hci_req_lock(hdev); ret = __hci_request(hdev, req, opt, timeout); @@ -279,10 +282,20 @@ static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, encrypt); - /* Authentication */ + /* Encryption */ hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } +static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +{ + __le16 policy = cpu_to_le16(opt); + + BT_DBG("%s %x", hdev->name, opt); + + /* Default link policy */ + hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); +} + /* Get HCI device by index. * Device is held on return. */ struct hci_dev *hci_dev_get(int index) @@ -694,32 +707,35 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; - case HCISETPTYPE: - hdev->pkt_type = (__u16) dr.dev_opt; - break; - case HCISETLINKPOL: - hdev->link_policy = (__u16) dr.dev_opt; + err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETLINKMODE: - hdev->link_mode = ((__u16) dr.dev_opt) & (HCI_LM_MASTER | HCI_LM_ACCEPT); + hdev->link_mode = ((__u16) dr.dev_opt) & + (HCI_LM_MASTER | HCI_LM_ACCEPT); + break; + + case HCISETPTYPE: + hdev->pkt_type = (__u16) dr.dev_opt; break; case HCISETACLMTU: - hdev->acl_mtu = *((__u16 *)&dr.dev_opt + 1); - hdev->acl_pkts = *((__u16 *)&dr.dev_opt + 0); + hdev->acl_mtu = *((__u16 *) &dr.dev_opt + 1); + hdev->acl_pkts = *((__u16 *) &dr.dev_opt + 0); break; case HCISETSCOMTU: - hdev->sco_mtu = *((__u16 *)&dr.dev_opt + 1); - hdev->sco_pkts = *((__u16 *)&dr.dev_opt + 0); + hdev->sco_mtu = *((__u16 *) &dr.dev_opt + 1); + hdev->sco_pkts = *((__u16 *) &dr.dev_opt + 0); break; default: err = -EINVAL; break; } + hci_dev_put(hdev); return err; } @@ -1270,9 +1286,12 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int struct hci_conn *c; c = list_entry(p, struct hci_conn, list); - if (c->type != type || c->state != BT_CONNECTED - || skb_queue_empty(&c->data_q)) + if (c->type != type || skb_queue_empty(&c->data_q)) continue; + + if (c->state != BT_CONNECTED && c->state != BT_CONFIG) + continue; + num++; if (c->sent < min) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6aef8f24e581..ad7a553d7713 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -110,6 +110,25 @@ static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static void hci_cc_read_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_link_policy *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) + conn->link_policy = __le16_to_cpu(rp->policy); + + hci_dev_unlock(hdev); +} + static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_write_link_policy *rp = (void *) skb->data; @@ -128,13 +147,41 @@ static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); - if (conn) { + if (conn) conn->link_policy = get_unaligned_le16(sent + 2); - } hci_dev_unlock(hdev); } +static void hci_cc_read_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_def_link_policy *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->link_policy = __le16_to_cpu(rp->policy); +} + +static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY); + if (!sent) + return; + + if (!status) + hdev->link_policy = get_unaligned_le16(sent); + + hci_req_complete(hdev, status); +} + static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -151,12 +198,14 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); if (!sent) return; - if (!status) - memcpy(hdev->dev_name, sent, 248); + memcpy(hdev->dev_name, sent, 248); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -266,12 +315,14 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV); if (!sent) return; - if (!status) - memcpy(hdev->dev_class, sent, 3); + memcpy(hdev->dev_class, sent, 3); } static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) @@ -286,7 +337,7 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) setting = __le16_to_cpu(rp->voice_setting); - if (hdev->voice_setting == setting ) + if (hdev->voice_setting == setting) return; hdev->voice_setting = setting; @@ -303,28 +354,31 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); + __u16 setting; void *sent; BT_DBG("%s status 0x%x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING); if (!sent) return; - if (!status) { - __u16 setting = get_unaligned_le16(sent); + setting = get_unaligned_le16(sent); - if (hdev->voice_setting != setting) { - hdev->voice_setting = setting; + if (hdev->voice_setting == setting) + return; - BT_DBG("%s voice setting 0x%04x", hdev->name, setting); + hdev->voice_setting = setting; - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); - hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } - } + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); + + if (hdev->notify) { + tasklet_disable(&hdev->tx_task); + hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); + tasklet_enable(&hdev->tx_task); } } @@ -337,6 +391,35 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, status); } +static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_ssp_mode *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->ssp_mode = rp->mode; +} + +static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_MODE); + if (!sent) + return; + + hdev->ssp_mode = *((__u8 *) sent); +} + static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -347,8 +430,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) return; hdev->hci_ver = rp->hci_ver; - hdev->hci_rev = btohs(rp->hci_rev); - hdev->manufacturer = btohs(rp->manufacturer); + hdev->hci_rev = __le16_to_cpu(rp->hci_rev); + hdev->manufacturer = __le16_to_cpu(rp->manufacturer); BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, hdev->manufacturer, @@ -536,11 +619,119 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_auth_requested *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_AUTH_REQUESTED); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + +static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_set_conn_encrypt *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_SET_CONN_ENCRYPT); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); } +static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_read_remote_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + +static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_read_remote_ext_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_proto_connect_cfm(conn, status); + hci_conn_put(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) { struct hci_cp_setup_sync_conn *cp; @@ -653,6 +844,7 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = 0x00; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -675,7 +867,14 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); - conn->state = BT_CONNECTED; + + if (conn->type == ACL_LINK) { + conn->state = BT_CONFIG; + hci_conn_hold(conn); + } else + conn->state = BT_CONNECTED; + + hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) conn->link_mode |= HCI_LM_AUTH; @@ -687,30 +886,17 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; cp.handle = ev->handle; - hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - } - - /* Set link policy */ - if (conn->type == ACL_LINK && hdev->link_policy) { - struct hci_cp_write_link_policy cp; - cp.handle = ev->handle; - cp.policy = cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, HCI_OP_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, + sizeof(cp), &cp); } /* Set packet type for incoming connection */ - if (!conn->out) { + if (!conn->out && hdev->hci_ver < 3) { struct hci_cp_change_conn_ptype cp; cp.handle = ev->handle; - cp.pkt_type = (conn->type == ACL_LINK) ? - cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): - cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - - hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); - } else { - /* Update disconnect timer */ - hci_conn_hold(conn); - hci_conn_put(conn); + cp.pkt_type = cpu_to_le16(conn->pkt_type); + hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, + sizeof(cp), &cp); } } else conn->state = BT_CLOSED; @@ -730,9 +916,10 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } } - hci_proto_connect_cfm(conn, ev->status); - if (ev->status) + if (ev->status) { + hci_proto_connect_cfm(conn, ev->status); hci_conn_del(conn); + } unlock: hci_dev_unlock(hdev); @@ -752,10 +939,14 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk if (mask & HCI_LM_ACCEPT) { /* Connection accepted */ + struct inquiry_entry *ie; struct hci_conn *conn; hci_dev_lock(hdev); + if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + memcpy(ie->data.dev_class, ev->dev_class, 3); + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { @@ -786,7 +977,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk struct hci_cp_accept_sync_conn_req cp; bacpy(&cp.bdaddr, &ev->bdaddr); - cp.pkt_type = cpu_to_le16(hdev->esco_type); + cp.pkt_type = cpu_to_le16(conn->pkt_type); cp.tx_bandwidth = cpu_to_le32(0x00001f40); cp.rx_bandwidth = cpu_to_le32(0x00001f40); @@ -822,6 +1013,9 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { conn->state = BT_CLOSED; + + hci_conn_del_sysfs(conn); + hci_proto_disconn_ind(conn, ev->reason); hci_conn_del(conn); } @@ -845,15 +1039,29 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); - hci_auth_cfm(conn, ev->status); + if (conn->state == BT_CONFIG) { + if (!ev->status && hdev->ssp_mode > 0 && + conn->ssp_mode > 0) { + struct hci_cp_set_conn_encrypt cp; + cp.handle = ev->handle; + cp.encrypt = 0x01; + hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } else + hci_auth_cfm(conn, ev->status); if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; - cp.handle = cpu_to_le16(conn->handle); - cp.encrypt = 1; - hci_send_cmd(conn->hdev, - HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); + cp.handle = ev->handle; + cp.encrypt = 0x01; + hci_send_cmd(hdev, HCI_OP_SET_CONN_ENCRYPT, + sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -883,15 +1091,24 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) { - if (ev->encrypt) + if (ev->encrypt) { + /* Encryption implies authentication */ + conn->link_mode |= HCI_LM_AUTH; conn->link_mode |= HCI_LM_ENCRYPT; - else + } else conn->link_mode &= ~HCI_LM_ENCRYPT; } clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); - hci_encrypt_cfm(conn, ev->status, ev->encrypt); + if (conn->state == BT_CONFIG) { + if (!ev->status) + conn->state = BT_CONNECTED; + + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } else + hci_encrypt_cfm(conn, ev->status, ev->encrypt); } hci_dev_unlock(hdev); @@ -926,14 +1143,29 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, ev->status); - if (ev->status) - return; - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) - memcpy(conn->features, ev->features, 8); + if (conn) { + if (!ev->status) + memcpy(conn->features, ev->features, 8); + + if (conn->state == BT_CONFIG) { + if (!ev->status && lmp_ssp_capable(hdev) && + lmp_ssp_capable(conn)) { + struct hci_cp_read_remote_ext_features cp; + cp.handle = ev->handle; + cp.page = 0x01; + hci_send_cmd(hdev, + HCI_OP_READ_REMOTE_EXT_FEATURES, + sizeof(cp), &cp); + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } + } hci_dev_unlock(hdev); } @@ -974,10 +1206,22 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_role_discovery(hdev, skb); break; + case HCI_OP_READ_LINK_POLICY: + hci_cc_read_link_policy(hdev, skb); + break; + case HCI_OP_WRITE_LINK_POLICY: hci_cc_write_link_policy(hdev, skb); break; + case HCI_OP_READ_DEF_LINK_POLICY: + hci_cc_read_def_link_policy(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_LINK_POLICY: + hci_cc_write_def_link_policy(hdev, skb); + break; + case HCI_OP_RESET: hci_cc_reset(hdev, skb); break; @@ -1022,6 +1266,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_host_buffer_size(hdev, skb); break; + case HCI_OP_READ_SSP_MODE: + hci_cc_read_ssp_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SSP_MODE: + hci_cc_write_ssp_mode(hdev, skb); + break; + case HCI_OP_READ_LOCAL_VERSION: hci_cc_read_local_version(hdev, skb); break; @@ -1076,10 +1328,26 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_add_sco(hdev, ev->status); break; + case HCI_OP_AUTH_REQUESTED: + hci_cs_auth_requested(hdev, ev->status); + break; + + case HCI_OP_SET_CONN_ENCRYPT: + hci_cs_set_conn_encrypt(hdev, ev->status); + break; + case HCI_OP_REMOTE_NAME_REQ: hci_cs_remote_name_req(hdev, ev->status); break; + case HCI_OP_READ_REMOTE_FEATURES: + hci_cs_read_remote_features(hdev, ev->status); + break; + + case HCI_OP_READ_REMOTE_EXT_FEATURES: + hci_cs_read_remote_ext_features(hdev, ev->status); + break; + case HCI_OP_SETUP_SYNC_CONN: hci_cs_setup_sync_conn(hdev, ev->status); break; @@ -1235,6 +1503,22 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_unlock(hdev); } +static inline void hci_pkt_type_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_pkt_type_change *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn && !ev->status) + conn->pkt_type = __le16_to_cpu(ev->pkt_type); + + hci_dev_unlock(hdev); +} + static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_pscan_rep_mode *ev = (void *) skb->data; @@ -1275,6 +1559,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1289,6 +1574,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1299,7 +1585,40 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_remote_ext_features *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + if (!ev->status && ev->page == 0x01) { + struct inquiry_entry *ie; + + if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) + ie->data.ssp_mode = (ev->features[0] & 0x01); + + conn->ssp_mode = (ev->features[0] & 0x01); + } + + if (conn->state == BT_CONFIG) { + if (!ev->status && hdev->ssp_mode > 0 && + conn->ssp_mode > 0 && conn->out) { + struct hci_cp_auth_requested cp; + cp.handle = ev->handle; + hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, + sizeof(cp), &cp); + } else { + conn->state = BT_CONNECTED; + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } + } + } + + hci_dev_unlock(hdev); } static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1312,12 +1631,22 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + if (ev->link_type == ESCO_LINK) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + conn->type = SCO_LINK; + } if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + + hci_conn_add_sysfs(conn); } else conn->state = BT_CLOSED; @@ -1371,6 +1700,7 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct memcpy(data.dev_class, info->dev_class, 3); data.clock_offset = info->clock_offset; data.rssi = info->rssi; + data.ssp_mode = 0x01; info++; hci_inquiry_cache_update(hdev, &data); } @@ -1378,6 +1708,53 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_io_capa_request *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + hci_conn_hold(conn); + + hci_dev_unlock(hdev); +} + +static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_simple_pair_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) + hci_conn_put(conn); + + hci_dev_unlock(hdev); +} + +static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_remote_host_features *ev = (void *) skb->data; + struct inquiry_entry *ie; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) + ie->data.ssp_mode = (ev->features[0] & 0x01); + + hci_dev_unlock(hdev); +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (void *) skb->data; @@ -1470,6 +1847,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_clock_offset_evt(hdev, skb); break; + case HCI_EV_PKT_TYPE_CHANGE: + hci_pkt_type_change_evt(hdev, skb); + break; + case HCI_EV_PSCAN_REP_MODE: hci_pscan_rep_mode_evt(hdev, skb); break; @@ -1498,6 +1879,18 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_extended_inquiry_result_evt(hdev, skb); break; + case HCI_EV_IO_CAPA_REQUEST: + hci_io_capa_request_evt(hdev, skb); + break; + + case HCI_EV_SIMPLE_PAIR_COMPLETE: + hci_simple_pair_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_HOST_FEATURES: + hci_remote_host_features_evt(hdev, skb); + break; + default: BT_DBG("%s event 0x%x", hdev->name, event); break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 747fabd735d2..d62579b67959 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -193,19 +193,11 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign return 0; - case HCISETSECMGR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - - if (arg) - set_bit(HCI_SECMGR, &hdev->flags); - else - clear_bit(HCI_SECMGR, &hdev->flags); - - return 0; - case HCIGETCONNINFO: - return hci_get_conn_info(hdev, (void __user *)arg); + return hci_get_conn_info(hdev, (void __user *) arg); + + case HCIGETAUTHINFO: + return hci_get_auth_info(hdev, (void __user *) arg); default: if (hdev->ioctl) @@ -217,7 +209,7 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - void __user *argp = (void __user *)arg; + void __user *argp = (void __user *) arg; int err; BT_DBG("cmd %x arg %lx", cmd, arg); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 84360c117d4e..f4f6615cad9f 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -3,8 +3,6 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/platform_device.h> - #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -12,10 +10,164 @@ #undef BT_DBG #define BT_DBG(D...) #endif + +struct class *bt_class = NULL; +EXPORT_SYMBOL_GPL(bt_class); + static struct workqueue_struct *btaddconn; static struct workqueue_struct *btdelconn; -static inline char *typetostr(int type) +static inline char *link_typetostr(int type) +{ + switch (type) { + case ACL_LINK: + return "ACL"; + case SCO_LINK: + return "SCO"; + case ESCO_LINK: + return "eSCO"; + default: + return "UNKNOWN"; + } +} + +static ssize_t show_link_type(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", link_typetostr(conn->type)); +} + +static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + bdaddr_t bdaddr; + baswap(&bdaddr, &conn->dst); + return sprintf(buf, "%s\n", batostr(&bdaddr)); +} + +static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + + return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + conn->features[0], conn->features[1], + conn->features[2], conn->features[3], + conn->features[4], conn->features[5], + conn->features[6], conn->features[7]); +} + +#define LINK_ATTR(_name,_mode,_show,_store) \ +struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store) + +static LINK_ATTR(type, S_IRUGO, show_link_type, NULL); +static LINK_ATTR(address, S_IRUGO, show_link_address, NULL); +static LINK_ATTR(features, S_IRUGO, show_link_features, NULL); + +static struct attribute *bt_link_attrs[] = { + &link_attr_type.attr, + &link_attr_address.attr, + &link_attr_features.attr, + NULL +}; + +static struct attribute_group bt_link_group = { + .attrs = bt_link_attrs, +}; + +static struct attribute_group *bt_link_groups[] = { + &bt_link_group, + NULL +}; + +static void bt_link_release(struct device *dev) +{ + void *data = dev_get_drvdata(dev); + kfree(data); +} + +static struct device_type bt_link = { + .name = "link", + .groups = bt_link_groups, + .release = bt_link_release, +}; + +static void add_conn(struct work_struct *work) +{ + struct hci_conn *conn = container_of(work, struct hci_conn, work); + + flush_workqueue(btdelconn); + + if (device_add(&conn->dev) < 0) { + BT_ERR("Failed to register connection device"); + return; + } +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + conn->dev.type = &bt_link; + conn->dev.class = bt_class; + conn->dev.parent = &hdev->dev; + + snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", + hdev->name, conn->handle); + + dev_set_drvdata(&conn->dev, conn); + + device_initialize(&conn->dev); + + INIT_WORK(&conn->work, add_conn); + + queue_work(btaddconn, &conn->work); +} + +/* + * The rfcomm tty device will possibly retain even when conn + * is down, and sysfs doesn't support move zombie device, + * so we should move the device before conn device is destroyed. + */ +static int __match_tty(struct device *dev, void *data) +{ + return !strncmp(dev->bus_id, "rfcomm", 6); +} + +static void del_conn(struct work_struct *work) +{ + struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_dev *hdev = conn->hdev; + + while (1) { + struct device *dev; + + dev = device_find_child(&conn->dev, NULL, __match_tty); + if (!dev) + break; + device_move(dev, NULL); + put_device(dev); + } + + device_del(&conn->dev); + put_device(&conn->dev); + hci_dev_put(hdev); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (!device_is_registered(&conn->dev)) + return; + + INIT_WORK(&conn->work, del_conn); + + queue_work(btdelconn, &conn->work); +} + +static inline char *host_typetostr(int type) { switch (type) { case HCI_VIRTUAL: @@ -40,7 +192,7 @@ static inline char *typetostr(int type) static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", typetostr(hdev->type)); + return sprintf(buf, "%s\n", host_typetostr(hdev->type)); } static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) @@ -113,11 +265,13 @@ static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *a struct inquiry_data *data = &e->data; bdaddr_t bdaddr; baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %u\n", + n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, data->pscan_mode, - data->dev_class[2], data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), data->rssi, e->timestamp); + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); } hci_dev_unlock_bh(hdev); @@ -219,178 +373,62 @@ static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR, static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, show_sniff_min_interval, store_sniff_min_interval); -static struct device_attribute *bt_attrs[] = { - &dev_attr_type, - &dev_attr_name, - &dev_attr_class, - &dev_attr_address, - &dev_attr_features, - &dev_attr_manufacturer, - &dev_attr_hci_version, - &dev_attr_hci_revision, - &dev_attr_inquiry_cache, - &dev_attr_idle_timeout, - &dev_attr_sniff_max_interval, - &dev_attr_sniff_min_interval, +static struct attribute *bt_host_attrs[] = { + &dev_attr_type.attr, + &dev_attr_name.attr, + &dev_attr_class.attr, + &dev_attr_address.attr, + &dev_attr_features.attr, + &dev_attr_manufacturer.attr, + &dev_attr_hci_version.attr, + &dev_attr_hci_revision.attr, + &dev_attr_inquiry_cache.attr, + &dev_attr_idle_timeout.attr, + &dev_attr_sniff_max_interval.attr, + &dev_attr_sniff_min_interval.attr, NULL }; -static ssize_t show_conn_type(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct hci_conn *conn = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", conn->type == ACL_LINK ? "ACL" : "SCO"); -} - -static ssize_t show_conn_address(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct hci_conn *conn = dev_get_drvdata(dev); - bdaddr_t bdaddr; - baswap(&bdaddr, &conn->dst); - return sprintf(buf, "%s\n", batostr(&bdaddr)); -} - -#define CONN_ATTR(_name,_mode,_show,_store) \ -struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store) - -static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL); -static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL); - -static struct device_attribute *conn_attrs[] = { - &conn_attr_type, - &conn_attr_address, - NULL +static struct attribute_group bt_host_group = { + .attrs = bt_host_attrs, }; -struct class *bt_class = NULL; -EXPORT_SYMBOL_GPL(bt_class); - -static struct bus_type bt_bus = { - .name = "bluetooth", +static struct attribute_group *bt_host_groups[] = { + &bt_host_group, + NULL }; -static struct platform_device *bt_platform; - -static void bt_release(struct device *dev) +static void bt_host_release(struct device *dev) { void *data = dev_get_drvdata(dev); kfree(data); } -static void add_conn(struct work_struct *work) -{ - struct hci_conn *conn = container_of(work, struct hci_conn, work); - int i; - - flush_workqueue(btdelconn); - - if (device_add(&conn->dev) < 0) { - BT_ERR("Failed to register connection device"); - return; - } - - for (i = 0; conn_attrs[i]; i++) - if (device_create_file(&conn->dev, conn_attrs[i]) < 0) - BT_ERR("Failed to create connection attribute"); -} - -void hci_conn_add_sysfs(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - bdaddr_t *ba = &conn->dst; - - BT_DBG("conn %p", conn); - - conn->dev.bus = &bt_bus; - conn->dev.parent = &hdev->dev; - - conn->dev.release = bt_release; - - snprintf(conn->dev.bus_id, BUS_ID_SIZE, - "%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X", - conn->type == ACL_LINK ? "acl" : "sco", - ba->b[5], ba->b[4], ba->b[3], - ba->b[2], ba->b[1], ba->b[0]); - - dev_set_drvdata(&conn->dev, conn); - - device_initialize(&conn->dev); - - INIT_WORK(&conn->work, add_conn); - - queue_work(btaddconn, &conn->work); -} - -/* - * The rfcomm tty device will possibly retain even when conn - * is down, and sysfs doesn't support move zombie device, - * so we should move the device before conn device is destroyed. - */ -static int __match_tty(struct device *dev, void *data) -{ - return !strncmp(dev->bus_id, "rfcomm", 6); -} - -static void del_conn(struct work_struct *work) -{ - struct hci_conn *conn = container_of(work, struct hci_conn, work); - struct hci_dev *hdev = conn->hdev; - - while (1) { - struct device *dev; - - dev = device_find_child(&conn->dev, NULL, __match_tty); - if (!dev) - break; - device_move(dev, NULL); - put_device(dev); - } - - device_del(&conn->dev); - put_device(&conn->dev); - hci_dev_put(hdev); -} - -void hci_conn_del_sysfs(struct hci_conn *conn) -{ - BT_DBG("conn %p", conn); - - if (!device_is_registered(&conn->dev)) - return; - - INIT_WORK(&conn->work, del_conn); - - queue_work(btdelconn, &conn->work); -} +static struct device_type bt_host = { + .name = "host", + .groups = bt_host_groups, + .release = bt_host_release, +}; int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; - unsigned int i; int err; BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - dev->bus = &bt_bus; + dev->type = &bt_host; + dev->class = bt_class; dev->parent = hdev->parent; strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); - dev->release = bt_release; - dev_set_drvdata(dev, hdev); err = device_register(dev); if (err < 0) return err; - for (i = 0; bt_attrs[i]; i++) - if (device_create_file(dev, bt_attrs[i]) < 0) - BT_ERR("Failed to create device attribute"); - - if (sysfs_create_link(&bt_class->subsys.kobj, - &dev->kobj, kobject_name(&dev->kobj)) < 0) - BT_ERR("Failed to create class symlink"); - return 0; } @@ -398,67 +436,35 @@ void hci_unregister_sysfs(struct hci_dev *hdev) { BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - sysfs_remove_link(&bt_class->subsys.kobj, - kobject_name(&hdev->dev.kobj)); - device_del(&hdev->dev); } int __init bt_sysfs_init(void) { - int err; - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) { - err = -ENOMEM; - goto out; - } + if (!btaddconn) + return -ENOMEM; btdelconn = create_singlethread_workqueue("btdelconn"); if (!btdelconn) { - err = -ENOMEM; - goto out_del; + destroy_workqueue(btaddconn); + return -ENOMEM; } - bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0); - if (IS_ERR(bt_platform)) { - err = PTR_ERR(bt_platform); - goto out_platform; - } - - err = bus_register(&bt_bus); - if (err < 0) - goto out_bus; - bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - err = PTR_ERR(bt_class); - goto out_class; + destroy_workqueue(btdelconn); + destroy_workqueue(btaddconn); + return PTR_ERR(bt_class); } return 0; - -out_class: - bus_unregister(&bt_bus); -out_bus: - platform_device_unregister(bt_platform); -out_platform: - destroy_workqueue(btdelconn); -out_del: - destroy_workqueue(btaddconn); -out: - return err; } void bt_sysfs_cleanup(void) { destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); class_destroy(bt_class); - - bus_unregister(&bt_bus); - - platform_device_unregister(bt_platform); } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 519cdb920f93..acdeab3d9807 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -578,9 +578,15 @@ static int hidp_session(void *arg) if (session->hid) { if (session->hid->claimed & HID_CLAIMED_INPUT) hidinput_disconnect(session->hid); - hid_free_device(session->hid); + hid_destroy_device(session->hid); } + /* Wakeup user-space polling for socket errors */ + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + + hidp_schedule(session); + fput(session->intr_sock->file); wait_event_timeout(*(ctrl_sk->sk_sleep), @@ -617,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session) static int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { - struct input_dev *input = session->input; + struct input_dev *input; int i; + input = input_allocate_device(); + if (!input) + return -ENOMEM; + + session->input = input; + input_set_drvdata(input, session); input->name = "Bluetooth HID Boot Protocol Device"; @@ -671,67 +683,114 @@ static void hidp_close(struct hid_device *hid) { } -static const struct { - __u16 idVendor; - __u16 idProduct; - unsigned quirks; -} hidp_blacklist[] = { - /* Apple wireless Mighty Mouse */ - { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL }, +static int hidp_parse(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hidp_connadd_req *req = session->req; + unsigned char *buf; + int ret; - { } /* Terminating entry */ -}; + buf = kmalloc(req->rd_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, req->rd_data, req->rd_size)) { + kfree(buf); + return -EFAULT; + } + + ret = hid_parse_report(session->hid, buf, req->rd_size); + + kfree(buf); + + if (ret) + return ret; + + session->req = NULL; + + return 0; +} + +static int hidp_start(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hid_report *report; + + list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT]. + report_list, list) + hidp_send_report(session, report); + + list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT]. + report_list, list) + hidp_send_report(session, report); -static void hidp_setup_quirks(struct hid_device *hid) + return 0; +} + +static void hidp_stop(struct hid_device *hid) { - unsigned int n; + struct hidp_session *session = hid->driver_data; + + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); - for (n = 0; hidp_blacklist[n].idVendor; n++) - if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) && - hidp_blacklist[n].idProduct == le16_to_cpu(hid->product)) - hid->quirks = hidp_blacklist[n].quirks; + if (hid->claimed & HID_CLAIMED_INPUT) + hidinput_disconnect(hid); + hid->claimed = 0; } -static void hidp_setup_hid(struct hidp_session *session, +static struct hid_ll_driver hidp_hid_driver = { + .parse = hidp_parse, + .start = hidp_start, + .stop = hidp_stop, + .open = hidp_open, + .close = hidp_close, + .hidinput_input_event = hidp_hidinput_event, +}; + +static int hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) { - struct hid_device *hid = session->hid; - struct hid_report *report; + struct hid_device *hid; bdaddr_t src, dst; + int ret; - baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); - baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); + hid = hid_allocate_device(); + if (IS_ERR(hid)) { + ret = PTR_ERR(session->hid); + goto err; + } + session->hid = hid; + session->req = req; hid->driver_data = session; - hid->country = req->country; + baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); + baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); hid->bus = BUS_BLUETOOTH; hid->vendor = req->vendor; hid->product = req->product; hid->version = req->version; + hid->country = req->country; strncpy(hid->name, req->name, 128); strncpy(hid->phys, batostr(&src), 64); strncpy(hid->uniq, batostr(&dst), 64); - hid->dev = hidp_get_device(session); - - hid->hid_open = hidp_open; - hid->hid_close = hidp_close; + hid->dev.parent = hidp_get_device(session); + hid->ll_driver = &hidp_hid_driver; - hid->hidinput_input_event = hidp_hidinput_event; + ret = hid_add_device(hid); + if (ret) + goto err_hid; - hidp_setup_quirks(hid); - - list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list) - hidp_send_report(session, report); - - list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) - hidp_send_report(session, report); - - if (hidinput_connect(hid) == 0) - hid->claimed |= HID_CLAIMED_INPUT; + return 0; +err_hid: + hid_destroy_device(hid); + session->hid = NULL; +err: + return ret; } int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) @@ -751,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); - if (req->rd_size > 0) { - unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL); - - if (!buf) { - kfree(session); - return -ENOMEM; - } - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - kfree(session); - return -EFAULT; - } - - session->hid = hid_parse_report(buf, req->rd_size); - - kfree(buf); - - if (!session->hid) { - kfree(session); - return -EINVAL; - } - } - - if (!session->hid) { - session->input = input_allocate_device(); - if (!session->input) { - kfree(session); - return -ENOMEM; - } - } - down_write(&hidp_session_sem); s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); @@ -810,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - if (session->input) { + if (req->rd_size > 0) { + err = hidp_setup_hid(session, req); + if (err && err != -ENODEV) + goto err_skb; + } + + if (!session->hid) { err = hidp_setup_input(session, req); if (err < 0) - goto failed; + goto err_skb; } - if (session->hid) - hidp_setup_hid(session, req); - __hidp_link_session(session); hidp_set_timer(session); @@ -844,17 +874,16 @@ unlink: __hidp_unlink_session(session); - if (session->input) { + if (session->input) input_unregister_device(session->input); - session->input = NULL; /* don't try to free it here */ - } - + if (session->hid) + hid_destroy_device(session->hid); +err_skb: + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); failed: up_write(&hidp_session_sem); - if (session->hid) - hid_free_device(session->hid); - input_free_device(session->input); kfree(session); return err; @@ -879,6 +908,10 @@ int hidp_del_connection(struct hidp_conndel_req *req) skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); + /* Wakeup user-space polling for socket errors */ + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + /* Kill session thread */ atomic_inc(&session->terminate); hidp_schedule(session); @@ -940,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci) return err; } +static const struct hid_device_id hidp_table[] = { + { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) }, + { } +}; + +static struct hid_driver hidp_driver = { + .name = "generic-bluetooth", + .id_table = hidp_table, +}; + static int __init hidp_init(void) { + int ret; + l2cap_load(); BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); - return hidp_init_sockets(); + ret = hid_register_driver(&hidp_driver); + if (ret) + goto err; + + ret = hidp_init_sockets(); + if (ret) + goto err_drv; + + return 0; +err_drv: + hid_unregister_driver(&hidp_driver); +err: + return ret; } static void __exit hidp_exit(void) { hidp_cleanup_sockets(); + hid_unregister_driver(&hidp_driver); } module_init(hidp_init); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 343fb0566b3e..e503c89057ad 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -151,6 +151,8 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; + + struct hidp_connadd_req *req; }; static inline void hidp_schedule(struct hidp_session *session) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 6e180d255505..9610a9c85b98 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.9" +#define VERSION "2.11" static u32 l2cap_feat_mask = 0x0000; @@ -76,11 +76,21 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, static void l2cap_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; + int reason; BT_DBG("sock %p state %d", sk, sk->sk_state); bh_lock_sock(sk); - __l2cap_sock_close(sk, ETIMEDOUT); + + if (sk->sk_state == BT_CONNECT && + (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | + L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) + reason = ECONNREFUSED; + else + reason = ETIMEDOUT; + + __l2cap_sock_close(sk, reason); + bh_unlock_sock(sk); l2cap_sock_kill(sk); @@ -240,7 +250,7 @@ static void l2cap_chan_del(struct sock *sk, int err) hci_conn_put(conn->hcon); } - sk->sk_state = BT_CLOSED; + sk->sk_state = BT_CLOSED; sock_set_flag(sk, SOCK_ZAPPED); if (err) @@ -253,6 +263,21 @@ static void l2cap_chan_del(struct sock *sk, int err) sk->sk_state_change(sk); } +/* Service level security */ +static inline int l2cap_check_link_mode(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || + (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) + return hci_conn_encrypt(conn->hcon); + + if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) + return hci_conn_auth(conn->hcon); + + return 1; +} + static inline u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; @@ -287,6 +312,36 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 return hci_send_acl(conn->hcon, skb, 0); } +static void l2cap_do_start(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (l2cap_check_link_mode(sk)) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } + } else { + struct l2cap_info_req req; + req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } +} + /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { @@ -301,16 +356,37 @@ static void l2cap_conn_start(struct l2cap_conn *conn) bh_lock_sock(sk); if (sk->sk_type != SOCK_SEQPACKET) { - l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else if (sk->sk_state == BT_CONNECT) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + bh_unlock_sock(sk); + continue; + } + + if (sk->sk_state == BT_CONNECT) { + if (l2cap_check_link_mode(sk)) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + + if (l2cap_check_link_mode(sk)) { + sk->sk_state = BT_CONFIG; + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + } else { + rsp.result = cpu_to_le16(L2CAP_CR_PEND); + rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); + } + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); } bh_unlock_sock(sk); @@ -321,22 +397,27 @@ static void l2cap_conn_start(struct l2cap_conn *conn) static void l2cap_conn_ready(struct l2cap_conn *conn) { - BT_DBG("conn %p", conn); + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; - if (conn->chan_list.head || !hlist_empty(&l2cap_sk_list.head)) { - struct l2cap_info_req req; + BT_DBG("conn %p", conn); - req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + read_lock(&l->lock); - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; - conn->info_ident = l2cap_get_ident(conn); + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); - mod_timer(&conn->info_timer, - jiffies + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + if (sk->sk_type != SOCK_SEQPACKET) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else if (sk->sk_state == BT_CONNECT) + l2cap_do_start(sk); - l2cap_send_cmd(conn, conn->info_ident, - L2CAP_INFO_REQ, sizeof(req), &req); + bh_unlock_sock(sk); } + + read_unlock(&l->lock); } /* Notify sockets that we cannot guaranty reliability anymore */ @@ -388,7 +469,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->feat_mask = 0; - setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn); + setup_timer(&conn->info_timer, l2cap_info_timeout, + (unsigned long) conn); spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); @@ -500,7 +582,7 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) l2cap_sock_close(sk); - parent->sk_state = BT_CLOSED; + parent->sk_state = BT_CLOSED; sock_set_flag(parent, SOCK_ZAPPED); } @@ -543,9 +625,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason) req.scid = cpu_to_le16(l2cap_pi(sk)->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); - } else { + } else l2cap_chan_del(sk, reason); - } break; case BT_CONNECT: @@ -614,9 +695,9 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; + sk->sk_state = BT_OPEN; - setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk); + setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); bt_sock_link(&l2cap_sk_list, sk); return sk; @@ -697,6 +778,7 @@ static int l2cap_do_connect(struct sock *sk) struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + __u8 auth_type; int err = 0; BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); @@ -708,7 +790,21 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - hcon = hci_connect(hdev, ACL_LINK, dst); + if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || + l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || + l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING_MITM; + else + auth_type = HCI_AT_GENERAL_BONDING_MITM; + } else { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING; + else + auth_type = HCI_AT_GENERAL_BONDING; + } + + hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); if (!hcon) goto done; @@ -729,22 +825,11 @@ static int l2cap_do_connect(struct sock *sk) l2cap_sock_set_timer(sk, sk->sk_sndtimeo); if (hcon->state == BT_CONNECTED) { - if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { - l2cap_conn_ready(conn); - goto done; - } - - if (sk->sk_type == SOCK_SEQPACKET) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - } else { + if (sk->sk_type != SOCK_SEQPACKET) { l2cap_sock_clear_timer(sk); sk->sk_state = BT_CONNECTED; - } + } else + l2cap_do_start(sk); } done: @@ -1145,7 +1230,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) __l2cap_sock_close(sk, 0); if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) - err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); } release_sock(sk); return err; @@ -1189,6 +1275,11 @@ static void l2cap_chan_ready(struct sock *sk) */ parent->sk_data_ready(parent, 0); } + + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + hci_conn_change_link_key(conn->hcon); + } } /* Copy frame to all raw sockets on that connection */ @@ -1477,10 +1568,10 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct sock *sk, *parent; - int result = 0, status = 0; + int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); - __le16 psm = req->psm; + __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); @@ -1491,6 +1582,13 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto sendresp; } + /* Check if the ACL is secure enough (if not SDP) */ + if (psm != cpu_to_le16(0x0001) && + !hci_conn_check_link_mode(conn->hcon)) { + result = L2CAP_CR_SEC_BLOCK; + goto response; + } + result = L2CAP_CR_NO_MEM; /* Check for backlog size */ @@ -1526,25 +1624,24 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - /* Service level security */ - result = L2CAP_CR_PEND; - status = L2CAP_CS_AUTHEN_PEND; - sk->sk_state = BT_CONNECT2; l2cap_pi(sk)->ident = cmd->ident; - if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || - (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) { - if (!hci_conn_encrypt(conn->hcon)) - goto done; - } else if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) { - if (!hci_conn_auth(conn->hcon)) - goto done; + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (l2cap_check_link_mode(sk)) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_AUTHEN_PEND; + } + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_NO_INFO; } - sk->sk_state = BT_CONFIG; - result = status = 0; - -done: write_unlock_bh(&list->lock); response: @@ -1556,6 +1653,21 @@ sendresp: rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { + struct l2cap_info_req info; + info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(info), &info); + } + return 0; } @@ -1664,9 +1776,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr } if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { - u8 req[64]; + u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, req), req); + l2cap_build_conf_req(sk, buf), buf); } unlock: @@ -1708,7 +1820,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_state = BT_DISCONN; - sk->sk_err = ECONNRESET; + sk->sk_err = ECONNRESET; l2cap_sock_set_timer(sk, HZ * 5); { struct l2cap_disconn_req req; @@ -2080,10 +2192,8 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn = conn = hcon->l2cap_data; - struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = hcon->l2cap_data; struct sock *sk; - int result; if (!conn) return 0; @@ -2095,45 +2205,65 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + struct l2cap_pinfo *pi = l2cap_pi(sk); + bh_lock_sock(sk); - if (sk->sk_state != BT_CONNECT2 || - (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || - (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) { + if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && + !(hcon->link_mode & HCI_LM_ENCRYPT) && + !status) { bh_unlock_sock(sk); continue; } - if (!status) { - sk->sk_state = BT_CONFIG; - result = 0; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ/10); - result = L2CAP_CR_SEC_BLOCK; - } + if (sk->sk_state == BT_CONNECT) { + if (!status) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 10); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (!status) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + } else { + sk->sk_state = BT_DISCONN; + l2cap_sock_set_timer(sk, HZ / 10); + result = L2CAP_CR_SEC_BLOCK; + } + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } bh_unlock_sock(sk); } read_unlock(&l->lock); + return 0; } -static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) +static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_chan_list *l; struct l2cap_conn *conn = hcon->l2cap_data; - struct l2cap_conn_rsp rsp; struct sock *sk; - int result; if (!conn) return 0; @@ -2145,36 +2275,59 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + struct l2cap_pinfo *pi = l2cap_pi(sk); + bh_lock_sock(sk); - if (sk->sk_state != BT_CONNECT2) { + if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && + (sk->sk_state == BT_CONNECTED || + sk->sk_state == BT_CONFIG) && + !status && encrypt == 0x00) { + __l2cap_sock_close(sk, ECONNREFUSED); bh_unlock_sock(sk); continue; } - if (!status) { - sk->sk_state = BT_CONFIG; - result = 0; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ/10); - result = L2CAP_CR_SEC_BLOCK; - } + if (sk->sk_state == BT_CONNECT) { + if (!status) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); + l2cap_pi(sk)->ident = l2cap_get_ident(conn); - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) - hci_conn_change_link_key(hcon); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 10); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (!status) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + } else { + sk->sk_state = BT_DISCONN; + l2cap_sock_set_timer(sk, HZ / 10); + result = L2CAP_CR_SEC_BLOCK; + } + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } bh_unlock_sock(sk); } read_unlock(&l->lock); + return 0; } @@ -2301,9 +2454,9 @@ static const struct proto_ops l2cap_sock_ops = { .sendmsg = l2cap_sock_sendmsg, .recvmsg = bt_sock_recvmsg, .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, - .ioctl = sock_no_ioctl, .shutdown = l2cap_sock_shutdown, .setsockopt = l2cap_sock_setsockopt, .getsockopt = l2cap_sock_getsockopt @@ -2385,7 +2538,7 @@ EXPORT_SYMBOL(l2cap_load); module_init(l2cap_init); module_exit(l2cap_exit); -MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); +MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 0c2c93735e93..ba537fae0a4c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -23,8 +23,6 @@ /* * Bluetooth RFCOMM core. - * - * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $ */ #include <linux/module.h> @@ -53,7 +51,7 @@ #define BT_DBG(D...) #endif -#define VERSION "1.8" +#define VERSION "1.10" static int disable_cfc = 0; static int channel_mtu = -1; @@ -230,6 +228,21 @@ static int rfcomm_l2sock_create(struct socket **sock) return err; } +static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) +{ + struct sock *sk = d->session->sock->sk; + + if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { + if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) + return 1; + } else if (d->link_mode & RFCOMM_LM_AUTH) { + if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) + return 1; + } + + return 0; +} + /* ---- RFCOMM DLCs ---- */ static void rfcomm_dlc_timeout(unsigned long arg) { @@ -371,15 +384,23 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, d->addr = __addr(s->initiator, dlci); d->priority = 7; - d->state = BT_CONFIG; + d->state = BT_CONFIG; rfcomm_dlc_link(s, d); + d->out = 1; + d->mtu = s->mtu; d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; - if (s->state == BT_CONNECTED) - rfcomm_send_pn(s, 1, d); + if (s->state == BT_CONNECTED) { + if (rfcomm_check_link_mode(d)) + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + else + rfcomm_send_pn(s, 1, d); + } + rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); + return 0; } @@ -1146,21 +1167,6 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) return 0; } -static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) -{ - struct sock *sk = d->session->sock->sk; - - if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { - if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) - return 1; - } else if (d->link_mode & RFCOMM_LM_AUTH) { - if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) - return 1; - } - - return 0; -} - static void rfcomm_dlc_accept(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; @@ -1205,10 +1211,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) if (rfcomm_check_link_mode(d)) { set_bit(RFCOMM_AUTH_PENDING, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - return 0; - } - - rfcomm_dlc_accept(d); + } else + rfcomm_dlc_accept(d); } return 0; } @@ -1223,10 +1227,8 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) if (rfcomm_check_link_mode(d)) { set_bit(RFCOMM_AUTH_PENDING, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - return 0; - } - - rfcomm_dlc_accept(d); + } else + rfcomm_dlc_accept(d); } else { rfcomm_send_dm(s, dlci); } @@ -1459,8 +1461,12 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb clear_bit(RFCOMM_TX_THROTTLED, &d->flags); rfcomm_dlc_lock(d); + + d->remote_v24_sig = msc->v24_sig; + if (d->modem_status) d->modem_status(d, msc->v24_sig); + rfcomm_dlc_unlock(d); rfcomm_send_msc(s, 0, dlci, msc->v24_sig); @@ -1636,7 +1642,11 @@ static void rfcomm_process_connect(struct rfcomm_session *s) d = list_entry(p, struct rfcomm_dlc, list); if (d->state == BT_CONFIG) { d->mtu = s->mtu; - rfcomm_send_pn(s, 1, d); + if (rfcomm_check_link_mode(d)) { + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + } else + rfcomm_send_pn(s, 1, d); } } } @@ -1709,7 +1719,11 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) if (test_and_clear_bit(RFCOMM_AUTH_ACCEPT, &d->flags)) { rfcomm_dlc_clear_timer(d); - rfcomm_dlc_accept(d); + if (d->out) { + rfcomm_send_pn(s, 1, d); + rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); + } else + rfcomm_dlc_accept(d); if (d->link_mode & RFCOMM_LM_SECURE) { struct sock *sk = s->sock->sk; hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); @@ -1717,7 +1731,10 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { rfcomm_dlc_clear_timer(d); - rfcomm_send_dm(s, d->dlci); + if (!d->out) + rfcomm_send_dm(s, d->dlci); + else + d->state = BT_CLOSED; __rfcomm_dlc_close(d, ECONNREFUSED); continue; } @@ -1726,7 +1743,7 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; if ((d->state == BT_CONNECTED || d->state == BT_DISCONN) && - d->mscex == RFCOMM_MSCEX_OK) + d->mscex == RFCOMM_MSCEX_OK) rfcomm_process_tx(d); } } @@ -1954,7 +1971,8 @@ static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); - if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) + if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && + !(conn->link_mode & HCI_LM_ENCRYPT) && !status) continue; if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) @@ -1988,6 +2006,14 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); + if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && + (d->state == BT_CONNECTED || + d->state == BT_CONFIG) && + !status && encrypt == 0x00) { + __rfcomm_dlc_close(d, ECONNREFUSED); + continue; + } + if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) continue; @@ -2089,7 +2115,7 @@ MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); module_param(l2cap_mtu, uint, 0644); MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); -MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); +MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 5083adcbfae5..8a972b6ba85f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -23,8 +23,6 @@ /* * RFCOMM sockets. - * - * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $ */ #include <linux/module.h> @@ -309,13 +307,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int sk->sk_destruct = rfcomm_sock_destruct; sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT; - sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; - sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; + sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; + sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; + sk->sk_state = BT_OPEN; bt_sock_link(&rfcomm_sk_list, sk); @@ -413,6 +411,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); rfcomm_pi(sk)->channel = sa->rc_channel; + d->link_mode = rfcomm_pi(sk)->link_mode; + err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); if (!err) err = bt_sock_wait_state(sk, BT_CONNECTED, @@ -688,6 +688,8 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied += chunk; size -= chunk; + sock_recv_timestamp(msg, sk, skb); + if (!(flags & MSG_PEEK)) { atomic_sub(chunk, &sk->sk_rmem_alloc); @@ -793,15 +795,20 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon struct sock *sk = sock->sk; int err; - lock_sock(sk); + BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); + + err = bt_sock_ioctl(sock, cmd, arg); + if (err == -ENOIOCTLCMD) { #ifdef CONFIG_BT_RFCOMM_TTY - err = rfcomm_dev_ioctl(sk, cmd, (void __user *)arg); + lock_sock(sk); + err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg); + release_sock(sk); #else - err = -EOPNOTSUPP; + err = -EOPNOTSUPP; #endif + } - release_sock(sk); return err; } diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c9191871c1e0..d3340dd52bcf 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -23,8 +23,6 @@ /* * RFCOMM TTY. - * - * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $ */ #include <linux/module.h> @@ -77,6 +75,8 @@ struct rfcomm_dev { struct device *tty_dev; atomic_t wmem_alloc; + + struct sk_buff_head pending; }; static LIST_HEAD(rfcomm_dev_list); @@ -264,13 +264,34 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) init_waitqueue_head(&dev->wait); tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev); + skb_queue_head_init(&dev->pending); + rfcomm_dlc_lock(dlc); + + if (req->flags & (1 << RFCOMM_REUSE_DLC)) { + struct sock *sk = dlc->owner; + struct sk_buff *skb; + + BUG_ON(!sk); + + rfcomm_dlc_throttle(dlc); + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) { + skb_orphan(skb); + skb_queue_tail(&dev->pending, skb); + atomic_sub(skb->len, &sk->sk_rmem_alloc); + } + } + dlc->data_ready = rfcomm_dev_data_ready; dlc->state_change = rfcomm_dev_state_change; dlc->modem_status = rfcomm_dev_modem_status; dlc->owner = dev; dev->dlc = dlc; + + rfcomm_dev_modem_status(dlc, dlc->remote_v24_sig); + rfcomm_dlc_unlock(dlc); /* It's safe to call __module_get() here because socket already @@ -539,11 +560,16 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) struct rfcomm_dev *dev = dlc->owner; struct tty_struct *tty; - if (!dev || !(tty = dev->tty)) { + if (!dev) { kfree_skb(skb); return; } + if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) { + skb_queue_tail(&dev->pending, skb); + return; + } + BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); tty_insert_flip_string(tty, skb->data, skb->len); @@ -617,14 +643,31 @@ static void rfcomm_tty_wakeup(unsigned long arg) return; BT_DBG("dev %p tty %p", dev, tty); + tty_wakeup(tty); +} - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup) - (tty->ldisc.write_wakeup)(tty); +static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev) +{ + struct tty_struct *tty = dev->tty; + struct sk_buff *skb; + int inserted = 0; - wake_up_interruptible(&tty->write_wait); -#ifdef SERIAL_HAVE_POLL_WAIT - wake_up_interruptible(&tty->poll_wait); -#endif + if (!tty) + return; + + BT_DBG("dev %p tty %p", dev, tty); + + rfcomm_dlc_lock(dev->dlc); + + while ((skb = skb_dequeue(&dev->pending))) { + inserted += tty_insert_flip_string(tty, skb->data, skb->len); + kfree_skb(skb); + } + + rfcomm_dlc_unlock(dev->dlc); + + if (inserted > 0) + tty_flip_buffer_push(tty); } static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) @@ -691,6 +734,10 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) if (err == 0) device_move(dev->tty_dev, rfcomm_get_device(dev)); + rfcomm_tty_copy_pending(dev); + + rfcomm_dlc_unthrottle(dev->dlc); + return err; } @@ -1005,9 +1052,7 @@ static void rfcomm_tty_flush_buffer(struct tty_struct *tty) return; skb_queue_purge(&dev->dlc->tx_queue); - - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup) - tty->ldisc.write_wakeup(tty); + tty_wakeup(tty); } static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch) @@ -1123,6 +1168,7 @@ int rfcomm_init_ttys(void) rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; + rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); if (tty_register_driver(rfcomm_tty_driver)) { diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b0d487e2db20..0cc91e6da76d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -53,7 +53,9 @@ #define BT_DBG(D...) #endif -#define VERSION "0.5" +#define VERSION "0.6" + +static int disable_esco = 0; static const struct proto_ops sco_sock_ops; @@ -193,9 +195,12 @@ static int sco_connect(struct sock *sk) err = -ENOMEM; - type = lmp_esco_capable(hdev) ? ESCO_LINK : SCO_LINK; + if (lmp_esco_capable(hdev) && !disable_esco) + type = ESCO_LINK; + else + type = SCO_LINK; - hcon = hci_connect(hdev, type, dst); + hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); if (!hcon) goto done; @@ -921,7 +926,7 @@ static const struct proto_ops sco_sock_ops = { .sendmsg = sco_sock_sendmsg, .recvmsg = bt_sock_recvmsg, .poll = bt_sock_poll, - .ioctl = sock_no_ioctl, + .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = sock_no_shutdown, @@ -994,7 +999,10 @@ static void __exit sco_exit(void) module_init(sco_init); module_exit(sco_exit); -MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); +module_param(disable_esco, bool, 0644); +MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); + +MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 12265aff7099..e143ca678881 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -5,6 +5,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC + select STP ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br.c b/net/bridge/br.c index 8f3c58e5f7a5..4d2c1f1cb524 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,39 +18,50 @@ #include <linux/init.h> #include <linux/llc.h> #include <net/llc.h> +#include <net/stp.h> #include "br_private.h" int (*br_should_route_hook)(struct sk_buff *skb); -static struct llc_sap *br_stp_sap; +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; + +static struct pernet_operations br_net_ops = { + .exit = br_net_exit, +}; static int __init br_init(void) { int err; - br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv); - if (!br_stp_sap) { + err = stp_proto_register(&br_stp_proto); + if (err < 0) { printk(KERN_ERR "bridge: can't register sap for STP\n"); - return -EADDRINUSE; + return err; } err = br_fdb_init(); if (err) goto err_out; - err = br_netfilter_init(); + err = register_pernet_subsys(&br_net_ops); if (err) goto err_out1; - err = register_netdevice_notifier(&br_device_notifier); + err = br_netfilter_init(); if (err) goto err_out2; - err = br_netlink_init(); + err = register_netdevice_notifier(&br_device_notifier); if (err) goto err_out3; + err = br_netlink_init(); + if (err) + goto err_out4; + brioctl_set(br_ioctl_deviceless_stub); br_handle_frame_hook = br_handle_frame; @@ -60,31 +69,32 @@ static int __init br_init(void) br_fdb_put_hook = br_fdb_put; return 0; -err_out3: +err_out4: unregister_netdevice_notifier(&br_device_notifier); -err_out2: +err_out3: br_netfilter_fini(); +err_out2: + unregister_pernet_subsys(&br_net_ops); err_out1: br_fdb_fini(); err_out: - llc_sap_put(br_stp_sap); + stp_proto_unregister(&br_stp_proto); return err; } static void __exit br_deinit(void) { - rcu_assign_pointer(br_stp_sap->rcv_func, NULL); + stp_proto_unregister(&br_stp_proto); br_netlink_fini(); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - br_cleanup_bridges(); + unregister_pernet_subsys(&br_net_ops); synchronize_net(); br_netfilter_fini(); - llc_sap_put(br_stp_sap); br_fdb_get_hook = NULL; br_fdb_put_hook = NULL; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index bf7787395fe0..22ba8632196f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -21,12 +19,6 @@ #include <asm/uaccess.h> #include "br_private.h" -static struct net_device_stats *br_dev_get_stats(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - return &br->statistics; -} - /* net device transmit always called with no BH (preempt_disabled) */ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -34,8 +26,8 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; - br->statistics.tx_packets++; - br->statistics.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); @@ -76,10 +68,17 @@ static int br_dev_stop(struct net_device *dev) static int br_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev))) + struct net_bridge *br = netdev_priv(dev); + if (new_mtu < 68 || new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; + +#ifdef CONFIG_BRIDGE_NETFILTER + /* remember the MTU in the rtable for PMTU */ + br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; +#endif + return 0; } @@ -95,6 +94,7 @@ static int br_set_mac_address(struct net_device *dev, void *p) spin_lock_bh(&br->lock); memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); br_stp_change_bridge_id(br, addr->sa_data); + br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); return 0; @@ -148,11 +148,16 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) } static struct ethtool_ops br_ethtool_ops = { - .get_drvinfo = br_getinfo, - .get_link = ethtool_op_get_link, - .set_sg = br_set_sg, - .set_tx_csum = br_set_tx_csum, - .set_tso = br_set_tso, + .get_drvinfo = br_getinfo, + .get_link = ethtool_op_get_link, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = br_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = br_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = br_set_tso, + .get_ufo = ethtool_op_get_ufo, + .get_flags = ethtool_op_get_flags, }; void br_dev_setup(struct net_device *dev) @@ -161,7 +166,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->do_ioctl = br_dev_ioctl; - dev->get_stats = br_dev_get_stats; dev->hard_start_xmit = br_dev_xmit; dev->open = br_dev_open; dev->set_multicast_list = br_dev_set_multicast_list; @@ -174,5 +178,6 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | + NETIF_F_NETNS_LOCAL; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 72c5976a5ce3..a48f5efdb6bf 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,6 +13,7 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/netdevice.h> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index bdd7c35c3c7b..bdd9ccea17ce 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -91,7 +89,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { - if (should_deliver(to, skb)) { + if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) { __br_forward(to, skb); return; } @@ -115,7 +113,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; + br->dev->stats.tx_dropped++; kfree_skb(skb); return; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index c2397f503b0f..573e20f7dba4 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -170,7 +168,7 @@ static void del_br(struct net_bridge *br) unregister_netdevice(br->dev); } -static struct net_device *new_bridge_dev(const char *name) +static struct net_device *new_bridge_dev(struct net *net, const char *name) { struct net_bridge *br; struct net_device *dev; @@ -180,6 +178,7 @@ static struct net_device *new_bridge_dev(const char *name) if (!dev) return NULL; + dev_net_set(dev, net); br = netdev_priv(dev); br->dev = dev; @@ -204,6 +203,9 @@ static struct net_device *new_bridge_dev(const char *name) br->topology_change = 0; br->topology_change_detected = 0; br->ageing_time = 300 * HZ; + + br_netfilter_rtable_init(br); + INIT_LIST_HEAD(&br->age_list); br_stp_timer_init(br); @@ -261,12 +263,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return p; } -int br_add_bridge(const char *name) +int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; int ret; - dev = new_bridge_dev(name); + dev = new_bridge_dev(net, name); if (!dev) return -ENOMEM; @@ -293,13 +295,13 @@ out_free: goto out; } -int br_del_bridge(const char *name) +int br_del_bridge(struct net *net, const char *name) { struct net_device *dev; int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(&init_net, name); + dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -375,6 +377,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (IS_ERR(p)) return PTR_ERR(p); + err = dev_set_promiscuity(dev, 1); + if (err) + goto put_back; + err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) @@ -389,7 +395,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) goto err2; rcu_assign_pointer(dev->br_port, p); - dev_set_promiscuity(dev, 1); + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -413,12 +419,12 @@ err2: br_fdb_delete_by_port(br, p, 1); err1: kobject_del(&p->kobj); - goto put_back; err0: kobject_put(&p->kobj); - + dev_set_promiscuity(dev, -1); put_back: dev_put(dev); + kfree(p); return err; } @@ -440,14 +446,18 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } -void __exit br_cleanup_bridges(void) +void br_net_exit(struct net *net) { - struct net_device *dev, *nxt; + struct net_device *dev; rtnl_lock(); - for_each_netdev_safe(&init_net, dev, nxt) - if (dev->priv_flags & IFF_EBRIDGE) +restart: + for_each_netdev(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); + goto restart; + } + } rtnl_unlock(); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 255c00f60ce7..30b88777c3df 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -24,13 +22,13 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) { - struct net_device *indev; + struct net_device *indev, *brdev = br->dev; - br->statistics.rx_packets++; - br->statistics.rx_bytes += skb->len; + brdev->stats.rx_packets++; + brdev->stats.rx_bytes += skb->len; indev = skb->dev; - skb->dev = br->dev; + skb->dev = brdev; NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); @@ -64,7 +62,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_multicast_ether_addr(dest)) { - br->statistics.multicast++; + br->dev->stats.multicast++; skb2 = skb; } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { skb2 = skb; @@ -136,14 +134,11 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; - /* Process STP BPDU's through normal netif_receive_skb() path */ - if (p->br->stp_enabled != BR_NO_STP) { - if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) - return NULL; - else - return skb; - } + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, + NULL, br_handle_local_finish)) + return NULL; /* frame consumed by filter */ + else + return skb; /* continue processing */ } switch (p->state) { diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 0655a5f07f58..6a6433daaf27 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -23,12 +21,12 @@ #include "br_private.h" /* called with RTNL */ -static int get_bridge_ifindices(int *indices, int num) +static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -91,7 +89,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(dev_net(br->dev), ifindex); if (dev == NULL) return -EINVAL; @@ -190,15 +188,21 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return 0; case BRCTL_SET_BRIDGE_HELLO_TIME: + { + unsigned long t = clock_t_to_jiffies(args[1]); if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (t < HZ) + return -EINVAL; + spin_lock_bh(&br->lock); - br->bridge_hello_time = clock_t_to_jiffies(args[1]); + br->bridge_hello_time = t; if (br_is_root_bridge(br)) br->hello_time = br->bridge_hello_time; spin_unlock_bh(&br->lock); return 0; + } case BRCTL_SET_BRIDGE_MAX_AGE: if (!capable(CAP_NET_ADMIN)) @@ -311,7 +315,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EOPNOTSUPP; } -static int old_deviceless(void __user *uarg) +static int old_deviceless(struct net *net, void __user *uarg) { unsigned long args[3]; @@ -333,7 +337,7 @@ static int old_deviceless(void __user *uarg) if (indices == NULL) return -ENOMEM; - args[2] = get_bridge_ifindices(indices, args[2]); + args[2] = get_bridge_ifindices(net, indices, args[2]); ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) ? -EFAULT : args[2]; @@ -356,9 +360,9 @@ static int old_deviceless(void __user *uarg) buf[IFNAMSIZ-1] = 0; if (args[0] == BRCTL_ADD_BRIDGE) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } @@ -370,7 +374,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(uarg); + return old_deviceless(net, uarg); case SIOCBRADDBR: case SIOCBRDELBR: @@ -385,9 +389,9 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } return -EOPNOTSUPP; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bb90cd7bace3..a4abed5b4c44 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,33 +101,30 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -/* We need these fake structures to make netfilter happy -- - * lots of places assume that skb->dst != NULL, which isn't - * all that unreasonable. - * +/* + * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter * refragmentation needs it, and the rt_flags entry because * ipt_REJECT needs it. Future netfilter modules might - * require us to fill additional fields. */ -static struct net_device __fake_net_device = { - .hard_header_len = ETH_HLEN, -#ifdef CONFIG_NET_NS - .nd_net = &init_net, -#endif -}; + * require us to fill additional fields. + */ +void br_netfilter_rtable_init(struct net_bridge *br) +{ + struct rtable *rt = &br->fake_rtable; -static struct rtable __fake_rtable = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .dev = &__fake_net_device, - .path = &__fake_rtable.u.dst, - .metrics = {[RTAX_MTU - 1] = 1500}, - .flags = DST_NOXFRM, - } - }, - .rt_flags = 0, -}; + atomic_set(&rt->u.dst.__refcnt, 1); + rt->u.dst.dev = br->dev; + rt->u.dst.path = &rt->u.dst; + rt->u.dst.metrics[RTAX_MTU - 1] = 1500; + rt->u.dst.flags = DST_NOXFRM; +} + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->br_port); + + return port ? &port->br->fake_rtable : NULL; +} static inline struct net_device *bridge_parent(const struct net_device *dev) { @@ -226,8 +223,12 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - skb->rtable = &__fake_rtable; - dst_hold(&__fake_rtable.u.dst); + skb->rtable = bridge_parent_rtable(nf_bridge->physindev); + if (!skb->rtable) { + kfree_skb(skb); + return 0; + } + dst_hold(&skb->rtable->u.dst); skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); @@ -391,8 +392,12 @@ bridged_dnat: skb->pkt_type = PACKET_HOST; } } else { - skb->rtable = &__fake_rtable; - dst_hold(&__fake_rtable.u.dst); + skb->rtable = bridge_parent_rtable(nf_bridge->physindev); + if (!skb->rtable) { + kfree_skb(skb); + return 0; + } + dst_hold(&skb->rtable->u.dst); } skb->dev = nf_bridge->physindev; @@ -611,8 +616,8 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (skb->rtable == &__fake_rtable) { - dst_release(&__fake_rtable.u.dst); + if (skb->rtable && skb->rtable == bridge_parent_rtable(in)) { + dst_release(&skb->rtable->u.dst); skb->rtable = NULL; } @@ -652,7 +657,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge; struct net_device *parent; - int pf; + u_int8_t pf; if (!skb->nf_bridge) return NF_ACCEPT; @@ -786,7 +791,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); - int pf; + u_int8_t pf; #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f155e6ce8a21..ba7be195803c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -82,6 +82,7 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { + struct net *net = dev_net(port->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -97,10 +98,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* @@ -112,11 +113,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int idx; - if (net != &init_net) - return 0; - idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -147,9 +145,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_bridge_port *p; u8 new_state; - if (net != &init_net) - return -EINVAL; - if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -165,7 +160,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(&init_net, ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 00644a544e3c..763a3ec292e5 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -37,9 +35,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; - if (dev_net(dev) != &init_net) - return NOTIFY_DONE; - /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c11b554fd109..b6c3b71974dc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -17,6 +15,7 @@ #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <net/route.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -90,11 +89,15 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct net_device_stats statistics; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; struct list_head age_list; unsigned long feature_mask; +#ifdef CONFIG_BRIDGE_NETFILTER + struct rtable fake_rtable; +#endif + unsigned long flags; +#define BR_SET_MAC_ADDR 0x00000001 /* STP */ bridge_id designated_root; @@ -175,9 +178,9 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); -extern int br_add_bridge(const char *name); -extern int br_del_bridge(const char *name); -extern void br_cleanup_bridges(void); +extern int br_add_bridge(struct net *net, const char *name); +extern int br_del_bridge(struct net *net, const char *name); +extern void br_net_exit(struct net *net); extern int br_add_if(struct net_bridge *br, struct net_device *dev); extern int br_del_if(struct net_bridge *br, @@ -198,9 +201,11 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us #ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); extern void br_netfilter_fini(void); +extern void br_netfilter_rtable_init(struct net_bridge *); #else #define br_netfilter_init() (0) #define br_netfilter_fini() do { } while(0) +#define br_netfilter_rtable_init(x) #endif /* br_stp.c */ @@ -227,8 +232,9 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p, extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ -extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); +struct stp_proto; +extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); /* br_stp_timer.c */ extern void br_stp_timer_init(struct net_bridge *br); diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index e29f01ac1adf..8b650f7fbfa0 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e38034aa56f5..6e63ec3f1fcf 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -5,14 +5,13 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> +#include <linux/rculist.h> #include "br_private.h" #include "br_private_stp.h" @@ -369,14 +368,25 @@ static void br_make_blocking(struct net_bridge_port *p) /* called under bridge lock */ static void br_make_forwarding(struct net_bridge_port *p) { - if (p->state == BR_STATE_BLOCKING) { - if (p->br->stp_enabled == BR_KERNEL_STP) - p->state = BR_STATE_LISTENING; - else - p->state = BR_STATE_LEARNING; + struct net_bridge *br = p->br; - br_log_state(p); - mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } + if (p->state != BR_STATE_BLOCKING) + return; + + if (br->forward_delay == 0) { + p->state = BR_STATE_FORWARDING; + br_topology_change_detection(br); + del_timer(&p->forward_delay_timer); + } + else if (p->br->stp_enabled == BR_KERNEL_STP) + p->state = BR_STATE_LISTENING; + else + p->state = BR_STATE_LEARNING; + + br_log_state(p); + + if (br->forward_delay != 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index ddeb6e5d45d6..81ae40b3f655 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,6 +18,7 @@ #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> +#include <net/stp.h> #include <asm/unaligned.h> #include "br_private.h" @@ -133,26 +132,17 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) * * NO locks, but rcu_read_lock (preempt_disabled) */ -int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) { - const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = rcu_dereference(dev->br_port); struct net_bridge *br; const unsigned char *buf; - if (dev_net(dev) != &init_net) - goto err; - if (!p) goto err; - if (pdu->ssap != LLC_SAP_BSPAN - || pdu->dsap != LLC_SAP_BSPAN - || pdu->ctrl_1 != LLC_PDU_TYPE_U) - goto err; - if (!pskb_may_pull(skb, 4)) goto err; @@ -226,5 +216,4 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&br->lock); err: kfree_skb(skb); - return 0; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 1a430eccec9b..9a52ac5b4525 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -216,6 +214,10 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; + /* user has chosen a value so keep it */ + if (br->flags & BR_SET_MAC_ADDR) + return; + list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 77f5255e6915..772a140bfdf0 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 27d6a511c8c1..158dee8b4965 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -29,11 +29,12 @@ */ static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, - void (*set)(struct net_bridge *, unsigned long)) + int (*set)(struct net_bridge *, unsigned long)) { struct net_bridge *br = to_bridge(d); char *endp; unsigned long val; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -43,9 +44,9 @@ static ssize_t store_bridge_parm(struct device *d, return -EINVAL; spin_lock_bh(&br->lock); - (*set)(br, val); + err = (*set)(br, val); spin_unlock_bh(&br->lock); - return len; + return err ? err : len; } @@ -56,12 +57,13 @@ static ssize_t show_forward_delay(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static void set_forward_delay(struct net_bridge *br, unsigned long val) +static int set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long delay = clock_t_to_jiffies(val); br->forward_delay = delay; if (br_is_root_bridge(br)) br->bridge_forward_delay = delay; + return 0; } static ssize_t store_forward_delay(struct device *d, @@ -80,12 +82,17 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static void set_hello_time(struct net_bridge *br, unsigned long val) +static int set_hello_time(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); + + if (t < HZ) + return -EINVAL; + br->hello_time = t; if (br_is_root_bridge(br)) br->bridge_hello_time = t; + return 0; } static ssize_t store_hello_time(struct device *d, @@ -104,12 +111,13 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->max_age)); } -static void set_max_age(struct net_bridge *br, unsigned long val) +static int set_max_age(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); br->max_age = t; if (br_is_root_bridge(br)) br->bridge_max_age = t; + return 0; } static ssize_t store_max_age(struct device *d, struct device_attribute *attr, @@ -126,9 +134,10 @@ static ssize_t show_ageing_time(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } -static void set_ageing_time(struct net_bridge *br, unsigned long val) +static int set_ageing_time(struct net_bridge *br, unsigned long val) { br->ageing_time = clock_t_to_jiffies(val); + return 0; } static ssize_t store_ageing_time(struct device *d, @@ -180,9 +189,10 @@ static ssize_t show_priority(struct device *d, struct device_attribute *attr, (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } -static void set_priority(struct net_bridge *br, unsigned long val) +static int set_priority(struct net_bridge *br, unsigned long val) { br_stp_set_bridge_priority(br, (u16) val); + return 0; } static ssize_t store_priority(struct device *d, struct device_attribute *attr, diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 7beeefa0f9c0..366d3e9d51f8 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,21 +2,21 @@ # Bridge netfilter configuration # -menu "Bridge: Netfilter Configuration" - depends on BRIDGE && BRIDGE_NETFILTER - -config BRIDGE_NF_EBTABLES +menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" + select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet filtering/NAT/brouting on the Ethernet bridge. + +if BRIDGE_NF_EBTABLES + # # tables # config BRIDGE_EBT_BROUTE tristate "ebt: broute table support" - depends on BRIDGE_NF_EBTABLES help The ebtables broute table is used to define rules that decide between bridging and routing frames, giving Linux the functionality of a @@ -27,7 +27,6 @@ config BRIDGE_EBT_BROUTE config BRIDGE_EBT_T_FILTER tristate "ebt: filter table support" - depends on BRIDGE_NF_EBTABLES help The ebtables filter table is used to define frame filtering rules at local input, forwarding and local output. See the man page for @@ -37,7 +36,6 @@ config BRIDGE_EBT_T_FILTER config BRIDGE_EBT_T_NAT tristate "ebt: nat table support" - depends on BRIDGE_NF_EBTABLES help The ebtables nat table is used to define rules that alter the MAC source address (MAC SNAT) or the MAC destination address (MAC DNAT). @@ -49,7 +47,6 @@ config BRIDGE_EBT_T_NAT # config BRIDGE_EBT_802_3 tristate "ebt: 802.3 filter support" - depends on BRIDGE_NF_EBTABLES help This option adds matching support for 802.3 Ethernet frames. @@ -57,7 +54,6 @@ config BRIDGE_EBT_802_3 config BRIDGE_EBT_AMONG tristate "ebt: among filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the among match, which allows matching the MAC source and/or destination address on a list of addresses. Optionally, @@ -67,7 +63,6 @@ config BRIDGE_EBT_AMONG config BRIDGE_EBT_ARP tristate "ebt: ARP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the ARP match, which allows ARP and RARP header field filtering. @@ -76,16 +71,23 @@ config BRIDGE_EBT_ARP config BRIDGE_EBT_IP tristate "ebt: IP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the IP match, which allows basic IP header field filtering. To compile it as a module, choose M here. If unsure, say N. +config BRIDGE_EBT_IP6 + tristate "ebt: IP6 filter support" + depends on BRIDGE_NF_EBTABLES && IPV6 + help + This option adds the IP6 match, which allows basic IPV6 header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" - depends on BRIDGE_NF_EBTABLES help This option adds the limit match, which allows you to control the rate at which a rule can be matched. This match is the @@ -96,7 +98,6 @@ config BRIDGE_EBT_LIMIT config BRIDGE_EBT_MARK tristate "ebt: mark filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark match, which allows matching frames based on the 'nfmark' value in the frame. This can be set by the mark target. @@ -107,7 +108,6 @@ config BRIDGE_EBT_MARK config BRIDGE_EBT_PKTTYPE tristate "ebt: packet type filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the packet type match, which allows matching on the type of packet based on its Ethernet "class" (as determined by @@ -118,7 +118,6 @@ config BRIDGE_EBT_PKTTYPE config BRIDGE_EBT_STP tristate "ebt: STP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the Spanning Tree Protocol match, which allows STP header field filtering. @@ -127,7 +126,6 @@ config BRIDGE_EBT_STP config BRIDGE_EBT_VLAN tristate "ebt: 802.1Q VLAN filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the 802.1Q vlan match, which allows the filtering of 802.1Q vlan fields. @@ -147,7 +145,6 @@ config BRIDGE_EBT_ARPREPLY config BRIDGE_EBT_DNAT tristate "ebt: dnat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC DNAT target, which allows altering the MAC destination address of frames. @@ -156,7 +153,6 @@ config BRIDGE_EBT_DNAT config BRIDGE_EBT_MARK_T tristate "ebt: mark target support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark target, which allows marking frames by setting the 'nfmark' value in the frame. @@ -167,7 +163,6 @@ config BRIDGE_EBT_MARK_T config BRIDGE_EBT_REDIRECT tristate "ebt: redirect target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC redirect target, which allows altering the MAC destination address of a frame to that of the device it arrived on. @@ -176,7 +171,6 @@ config BRIDGE_EBT_REDIRECT config BRIDGE_EBT_SNAT tristate "ebt: snat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC SNAT target, which allows altering the MAC source address of frames. @@ -187,7 +181,6 @@ config BRIDGE_EBT_SNAT # config BRIDGE_EBT_LOG tristate "ebt: log support" - depends on BRIDGE_NF_EBTABLES help This option adds the log watcher, that you can use in any rule in any ebtables table. It records info about the frame header @@ -197,7 +190,6 @@ config BRIDGE_EBT_LOG config BRIDGE_EBT_ULOG tristate "ebt: ulog support (OBSOLETE)" - depends on BRIDGE_NF_EBTABLES help This option enables the old bridge-specific "ebt_ulog" implementation which has been obsoleted by the new "nfnetlink_log" code (see @@ -214,16 +206,15 @@ config BRIDGE_EBT_ULOG config BRIDGE_EBT_NFLOG tristate "ebt: nflog support" - depends on BRIDGE_NF_EBTABLES help This option enables the nflog watcher, which allows to LOG messages through the netfilter logging API, which can use either the old LOG target, the old ULOG target or nfnetlink_log as backend. - This option adds the ulog watcher, that you can use in any rule + This option adds the nflog watcher, that you can use in any rule in any ebtables table. To compile it as a module, choose M here. If unsure, say N. -endmenu +endif # BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 83715d73a503..0718699540b0 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 98534025360f..bd91dc58d49b 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -7,64 +7,63 @@ * May 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_802_3.h> -#include <linux/module.h> -static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) - return EBT_NOMATCH; + return false; if (FWINV(info->type != type, EBT_802_3_TYPE)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static struct ebt_match filter_802_3; -static int ebt_802_3_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; - if (datalen < sizeof(struct ebt_802_3_info)) - return -EINVAL; if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_802_3 __read_mostly = { - .name = EBT_802_3_MATCH, - .match = ebt_filter_802_3, - .check = ebt_802_3_check, +static struct xt_match ebt_802_3_mt_reg __read_mostly = { + .name = "802_3", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_802_3_mt, + .checkentry = ebt_802_3_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_802_3_info)), .me = THIS_MODULE, }; static int __init ebt_802_3_init(void) { - return ebt_register_match(&filter_802_3); + return xt_register_match(&ebt_802_3_mt_reg); } static void __exit ebt_802_3_fini(void) { - ebt_unregister_match(&filter_802_3); + xt_unregister_match(&ebt_802_3_mt_reg); } module_init(ebt_802_3_init); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 70b6dca5ea75..b595f091f35b 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -7,15 +7,15 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_among.h> #include <linux/ip.h> #include <linux/if_arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> -static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, - const char *mac, __be32 ip) +static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, __be32 ip) { /* You may be puzzled as to how this code works. * Some tricks were used, refer to @@ -33,23 +33,19 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, if (ip) { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0 || p->ip == ip) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0 || p->ip == ip) + return true; } } else { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0) + return true; } } - return 0; + return false; } static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash @@ -131,12 +127,10 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr) return 0; } -static int ebt_filter_among(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; __be32 dip = 0, sip = 0; @@ -147,41 +141,41 @@ static int ebt_filter_among(const struct sk_buff *skb, if (wh_src) { smac = eth_hdr(skb)->h_source; if (get_ip_src(skb, &sip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } } if (wh_dst) { dmac = eth_hdr(skb)->h_dest; if (get_ip_dst(skb, &dip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_DST_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_among_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, - unsigned int datalen) +static bool ebt_among_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; + const struct ebt_entry_match *em = + container_of(par->matchinfo, const struct ebt_entry_match, data); int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; @@ -191,42 +185,45 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask, expected_length += ebt_mac_wormhash_size(wh_dst); expected_length += ebt_mac_wormhash_size(wh_src); - if (datalen != EBT_ALIGN(expected_length)) { + if (em->match_size != EBT_ALIGN(expected_length)) { printk(KERN_WARNING "ebtables: among: wrong size: %d " "against expected %d, rounded to %Zd\n", - datalen, expected_length, + em->match_size, expected_length, EBT_ALIGN(expected_length)); - return -EINVAL; + return false; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { printk(KERN_WARNING "ebtables: among: dst integrity fail: %x\n", -err); - return -EINVAL; + return false; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { printk(KERN_WARNING "ebtables: among: src integrity fail: %x\n", -err); - return -EINVAL; + return false; } - return 0; + return true; } -static struct ebt_match filter_among __read_mostly = { - .name = EBT_AMONG_MATCH, - .match = ebt_filter_among, - .check = ebt_among_check, +static struct xt_match ebt_among_mt_reg __read_mostly = { + .name = "among", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_among_mt, + .checkentry = ebt_among_mt_check, + .matchsize = -1, /* special case */ .me = THIS_MODULE, }; static int __init ebt_among_init(void) { - return ebt_register_match(&filter_among); + return xt_register_match(&ebt_among_mt_reg); } static void __exit ebt_among_fini(void) { - ebt_unregister_match(&filter_among); + xt_unregister_match(&ebt_among_mt_reg); } module_init(ebt_among_init); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 7c535be75665..b7ad60419f9a 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -8,58 +8,58 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> -static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; const struct arphdr *ah; struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != ah->ar_op, EBT_ARP_OPCODE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != ah->ar_hrd, EBT_ARP_HTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != ah->ar_pro, EBT_ARP_PTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { const __be32 *sap, *dap; __be32 saddr, daddr; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) - return EBT_NOMATCH; + return false; sap = skb_header_pointer(skb, sizeof(struct arphdr) + ah->ar_hln, sizeof(saddr), &saddr); if (sap == NULL) - return EBT_NOMATCH; + return false; dap = skb_header_pointer(skb, sizeof(struct arphdr) + 2*ah->ar_hln+sizeof(saddr), sizeof(daddr), &daddr); if (dap == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_IP && FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_DST_IP && FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_GRAT && FWINV(*dap != *sap, EBT_ARP_GRAT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { @@ -68,18 +68,18 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_MAC) { mp = skb_header_pointer(skb, sizeof(struct arphdr), sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->smaddr[i]) & info->smmsk[i]; if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_ARP_DST_MAC) { @@ -87,50 +87,51 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_hln + ah->ar_pln, sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->dmaddr[i]) & info->dmmsk[i]; if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_arp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) - return -EINVAL; if ((e->ethproto != htons(ETH_P_ARP) && e->ethproto != htons(ETH_P_RARP)) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_arp __read_mostly = { - .name = EBT_ARP_MATCH, - .match = ebt_filter_arp, - .check = ebt_arp_check, +static struct xt_match ebt_arp_mt_reg __read_mostly = { + .name = "arp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_arp_mt, + .checkentry = ebt_arp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_arp_info)), .me = THIS_MODULE, }; static int __init ebt_arp_init(void) { - return ebt_register_match(&filter_arp); + return xt_register_match(&ebt_arp_mt_reg); } static void __exit ebt_arp_fini(void) { - ebt_unregister_match(&filter_arp); + xt_unregister_match(&ebt_arp_mt_reg); } module_init(ebt_arp_init); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 0c4279590fc7..76584cd72e57 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -8,18 +8,17 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arpreply.h> #include <linux/if_arp.h> #include <net/arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> -static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_arpreply_info *info = (void *)data; + const struct ebt_arpreply_info *info = par->targinfo; const __be32 *siptr, *diptr; __be32 _sip, _dip; const struct arphdr *ap; @@ -52,45 +51,45 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, *diptr, shp, info->mac, shp); return info->target; } -static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_arpreply_info *info = data; + const struct ebt_arpreply_info *info = par->targinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; + return false; if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target reply_target __read_mostly = { - .name = EBT_ARPREPLY_TARGET, - .target = ebt_target_reply, - .check = ebt_target_reply_check, +static struct xt_target ebt_arpreply_tg_reg __read_mostly = { + .name = "arpreply", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), + .target = ebt_arpreply_tg, + .checkentry = ebt_arpreply_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_arpreply_info)), .me = THIS_MODULE, }; static int __init ebt_arpreply_init(void) { - return ebt_register_target(&reply_target); + return xt_register_target(&ebt_arpreply_tg_reg); } static void __exit ebt_arpreply_fini(void) { - ebt_unregister_target(&reply_target); + xt_unregister_target(&ebt_arpreply_tg_reg); } module_init(ebt_arpreply_init); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ca64c1cc1b47..6b49ea9e31fb 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -7,18 +7,17 @@ * June, 2002 * */ - +#include <linux/module.h> +#include <net/sock.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> -#include <linux/module.h> -#include <net/sock.h> -static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -27,40 +26,46 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, return info->target; } -static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; + unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || - (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target dnat __read_mostly = { - .name = EBT_DNAT_TARGET, - .target = ebt_target_dnat, - .check = ebt_target_dnat_check, +static struct xt_target ebt_dnat_tg_reg __read_mostly = { + .name = "dnat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), + .target = ebt_dnat_tg, + .checkentry = ebt_dnat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_dnat_init(void) { - return ebt_register_target(&dnat); + return xt_register_target(&ebt_dnat_tg_reg); } static void __exit ebt_dnat_fini(void) { - ebt_unregister_target(&dnat); + xt_unregister_target(&ebt_dnat_tg_reg); } module_init(ebt_dnat_init); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 65caa00dcf2a..d771bbfbcbe6 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -11,24 +11,23 @@ * Innominate Security Technologies AG <mhopf@innominate.com> * September, 2002 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/in.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> struct tcpudphdr { __be16 src; __be16 dst; }; -static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; const struct iphdr *ih; struct iphdr _iph; const struct tcpudphdr *pptr; @@ -36,92 +35,93 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_TOS && FWINV(info->tos != ih->tos, EBT_IP_TOS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_SOURCE && FWINV((ih->saddr & info->smsk) != info->saddr, EBT_IP_SOURCE)) - return EBT_NOMATCH; + return false; if ((info->bitmask & EBT_IP_DEST) && FWINV((ih->daddr & info->dmsk) != info->daddr, EBT_IP_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_PROTO) { if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) - return EBT_MATCH; + return true; if (ntohs(ih->frag_off) & IP_OFFSET) - return EBT_NOMATCH; + return false; pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP_DPORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP_SPORT)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_ip_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ip_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IP) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) - return -EINVAL; + return false; if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { if (info->invflags & EBT_IP_PROTO) - return -EINVAL; + return false; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return -EINVAL; + return false; } if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) - return -EINVAL; + return false; if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_ip __read_mostly = { - .name = EBT_IP_MATCH, - .match = ebt_filter_ip, - .check = ebt_ip_check, +static struct xt_match ebt_ip_mt_reg __read_mostly = { + .name = "ip", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip_mt, + .checkentry = ebt_ip_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip_info)), .me = THIS_MODULE, }; static int __init ebt_ip_init(void) { - return ebt_register_match(&filter_ip); + return xt_register_match(&ebt_ip_mt_reg); } static void __exit ebt_ip_fini(void) { - ebt_unregister_match(&filter_ip); + xt_unregister_match(&ebt_ip_mt_reg); } module_init(ebt_ip_init); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c new file mode 100644 index 000000000000..784a6573876c --- /dev/null +++ b/net/bridge/netfilter/ebt_ip6.c @@ -0,0 +1,142 @@ +/* + * ebt_ip6 + * + * Authors: + * Manohar Castelino <manohar.r.castelino@intel.com> + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Jan Engelhardt <jengelh@computergmbh.de> + * + * Summary: + * This is just a modification of the IPv4 code written by + * Bart De Schuymer <bdschuym@pandora.be> + * with the changes required to support IPv6 + * + * Jan, 2008 + */ +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in.h> +#include <linux/module.h> +#include <net/dsfield.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> + +struct tcpudphdr { + __be16 src; + __be16 dst; +}; + +static bool +ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ebt_ip6_info *info = par->matchinfo; + const struct ipv6hdr *ih6; + struct ipv6hdr _ip6h; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + struct in6_addr tmp_addr; + int i; + + ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); + if (ih6 == NULL) + return false; + if (info->bitmask & EBT_IP6_TCLASS && + FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + return false; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & + info->smsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_SOURCE && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), + EBT_IP6_SOURCE)) + return false; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & + info->dmsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_DEST && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) + return false; + if (info->bitmask & EBT_IP6_PROTO) { + uint8_t nexthdr = ih6->nexthdr; + int offset_ph; + + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); + if (offset_ph == -1) + return false; + if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + return false; + if (!(info->bitmask & EBT_IP6_DPORT) && + !(info->bitmask & EBT_IP6_SPORT)) + return true; + pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), + &_ports); + if (pptr == NULL) + return false; + if (info->bitmask & EBT_IP6_DPORT) { + u32 dst = ntohs(pptr->dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], EBT_IP6_DPORT)) + return false; + } + if (info->bitmask & EBT_IP6_SPORT) { + u32 src = ntohs(pptr->src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], EBT_IP6_SPORT)) + return false; + } + return true; + } + return true; +} + +static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_entry *e = par->entryinfo; + struct ebt_ip6_info *info = par->matchinfo; + + if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) + return false; + if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) + return false; + if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { + if (info->invflags & EBT_IP6_PROTO) + return false; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return false; + } + if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) + return false; + if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) + return false; + return true; +} + +static struct xt_match ebt_ip6_mt_reg __read_mostly = { + .name = "ip6", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip6_mt, + .checkentry = ebt_ip6_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip6_info)), + .me = THIS_MODULE, +}; + +static int __init ebt_ip6_init(void) +{ + return xt_register_match(&ebt_ip6_mt_reg); +} + +static void __exit ebt_ip6_fini(void) +{ + xt_unregister_match(&ebt_ip6_mt_reg); +} + +module_init(ebt_ip6_init); +module_exit(ebt_ip6_fini); +MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 8cbdc01c253e..f7bd9192ff0c 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -10,13 +10,12 @@ * September, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_limit.h> #include <linux/module.h> - #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> static DEFINE_SPINLOCK(limit_lock); @@ -31,11 +30,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int ebt_limit_match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; + struct ebt_limit_info *info = (void *)par->matchinfo; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -47,11 +45,11 @@ static int ebt_limit_match(const struct sk_buff *skb, /* We're not limited. */ info->credit -= info->cost; spin_unlock_bh(&limit_lock); - return EBT_MATCH; + return true; } spin_unlock_bh(&limit_lock); - return EBT_NOMATCH; + return false; } /* Precision saver. */ @@ -66,20 +64,16 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; } -static int ebt_limit_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) { - struct ebt_limit_info *info = data; - - if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) - return -EINVAL; + struct ebt_limit_info *info = par->matchinfo; /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { printk("Overflow in ebt_limit, try lower: %u/%u\n", info->avg, info->burst); - return -EINVAL; + return false; } /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ @@ -87,24 +81,27 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask, info->credit = user2credits(info->avg * info->burst); info->credit_cap = user2credits(info->avg * info->burst); info->cost = user2credits(info->avg); - return 0; + return true; } -static struct ebt_match ebt_limit_reg __read_mostly = { - .name = EBT_LIMIT_MATCH, - .match = ebt_limit_match, - .check = ebt_limit_check, +static struct xt_match ebt_limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_limit_mt, + .checkentry = ebt_limit_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_limit_info)), .me = THIS_MODULE, }; static int __init ebt_limit_init(void) { - return ebt_register_match(&ebt_limit_reg); + return xt_register_match(&ebt_limit_mt_reg); } static void __exit ebt_limit_fini(void) { - ebt_unregister_match(&ebt_limit_reg); + xt_unregister_match(&ebt_limit_mt_reg); } module_init(ebt_limit_init); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0b209e4aad0a..3d33c608906a 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -8,32 +8,32 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_log.h> -#include <linux/netfilter.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/spinlock.h> #include <net/netfilter/nf_log.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in6.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/netfilter.h> static DEFINE_SPINLOCK(ebt_log_lock); -static int ebt_log_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_log_tg_check(const struct xt_tgchk_param *par) { - struct ebt_log_info *info = data; + struct ebt_log_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) - return -EINVAL; if (info->bitmask & ~EBT_LOG_MASK) - return -EINVAL; + return false; if (info->loglevel >= 8) - return -EINVAL; + return false; info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } struct tcpudphdr @@ -58,9 +58,30 @@ static void print_MAC(const unsigned char *p) printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':'); } +static void +print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) +{ + if (protocol == IPPROTO_TCP || + protocol == IPPROTO_UDP || + protocol == IPPROTO_UDPLITE || + protocol == IPPROTO_SCTP || + protocol == IPPROTO_DCCP) { + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + + pptr = skb_header_pointer(skb, offset, + sizeof(_ports), &_ports); + if (pptr == NULL) { + printk(" INCOMPLETE TCP/UDP header"); + return; + } + printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + } +} + #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void -ebt_log_packet(unsigned int pf, unsigned int hooknum, +ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) @@ -95,25 +116,35 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), NIPQUAD(ih->daddr), ih->tos, ih->protocol); - if (ih->protocol == IPPROTO_TCP || - ih->protocol == IPPROTO_UDP || - ih->protocol == IPPROTO_UDPLITE || - ih->protocol == IPPROTO_SCTP || - ih->protocol == IPPROTO_DCCP) { - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; - - pptr = skb_header_pointer(skb, ih->ihl*4, - sizeof(_ports), &_ports); - if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); - goto out; - } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), - ntohs(pptr->dst)); + print_ports(skb, ih->protocol, ih->ihl*4); + goto out; + } + +#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE) + if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == + htons(ETH_P_IPV6)) { + const struct ipv6hdr *ih; + struct ipv6hdr _iph; + uint8_t nexthdr; + int offset_ph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + printk(" INCOMPLETE IPv6 header"); + goto out; } + printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x " + "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 " + "priority=0x%01X, Next Header=%d", NIP6(ih->saddr), + NIP6(ih->daddr), ih->priority, ih->nexthdr); + nexthdr = ih->nexthdr; + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); + if (offset_ph == -1) + goto out; + print_ports(skb, nexthdr, offset_ph); goto out; } +#endif if ((bitmask & EBT_LOG_ARP) && ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || @@ -160,11 +191,10 @@ out: } -static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_log_info *info = data; + const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; @@ -172,18 +202,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - "%s", info->prefix); + nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); else - ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - info->prefix); + ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, info->prefix); + return EBT_CONTINUE; } -static struct ebt_watcher log = -{ - .name = EBT_LOG_WATCHER, - .watcher = ebt_log, - .check = ebt_log_check, +static struct xt_target ebt_log_tg_reg __read_mostly = { + .name = "log", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_log_tg, + .checkentry = ebt_log_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_log_info)), .me = THIS_MODULE, }; @@ -197,17 +230,17 @@ static int __init ebt_log_init(void) { int ret; - ret = ebt_register_watcher(&log); + ret = xt_register_target(&ebt_log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_BRIDGE, &ebt_log_logger); + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); return 0; } static void __exit ebt_log_fini(void) { nf_log_unregister(&ebt_log_logger); - ebt_unregister_watcher(&log); + xt_unregister_target(&ebt_log_tg_reg); } module_init(ebt_log_init); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 36723f47db0a..2fee7e8e2e93 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -13,15 +13,15 @@ * Marking a frame doesn't really change anything in the frame anyway. */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_t.h> -#include <linux/module.h> -static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int action = info->target & -16; if (action == MARK_SET_VALUE) @@ -36,42 +36,41 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target mark_target __read_mostly = { - .name = EBT_MARK_TARGET, - .target = ebt_target_mark, - .check = ebt_target_mark_check, +static struct xt_target ebt_mark_tg_reg __read_mostly = { + .name = "mark", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_mark_tg, + .checkentry = ebt_mark_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_mark_t_info)), .me = THIS_MODULE, }; static int __init ebt_mark_init(void) { - return ebt_register_target(&mark_target); + return xt_register_target(&ebt_mark_tg_reg); } static void __exit ebt_mark_fini(void) { - ebt_unregister_target(&mark_target); + xt_unregister_target(&ebt_mark_tg_reg); } module_init(ebt_mark_init); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index 9b0a4543861f..ea570f214b1d 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -7,53 +7,52 @@ * July, 2002 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_m.h> -#include <linux/module.h> -static int ebt_filter_mark(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; if (info->bitmask & EBT_MARK_OR) - return !(!!(skb->mark & info->mask) ^ info->invert); - return !(((skb->mark & info->mask) == info->mark) ^ info->invert); + return !!(skb->mark & info->mask) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int ebt_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) - return -EINVAL; if (info->bitmask & ~EBT_MARK_MASK) - return -EINVAL; + return false; if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) - return -EINVAL; + return false; if (!info->bitmask) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_mark __read_mostly = { - .name = EBT_MARK_MATCH, - .match = ebt_filter_mark, - .check = ebt_mark_check, +static struct xt_match ebt_mark_mt_reg __read_mostly = { + .name = "mark_m", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_mark_mt, + .checkentry = ebt_mark_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_mark_m_info)), .me = THIS_MODULE, }; static int __init ebt_mark_m_init(void) { - return ebt_register_match(&filter_mark); + return xt_register_match(&ebt_mark_mt_reg); } static void __exit ebt_mark_m_fini(void) { - ebt_unregister_match(&filter_mark); + xt_unregister_match(&ebt_mark_mt_reg); } module_init(ebt_mark_m_init); diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 8e799aa9e560..2a63d996dd4e 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -14,17 +14,15 @@ #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nflog.h> #include <net/netfilter/nf_log.h> -static void ebt_nflog(const struct sk_buff *skb, - unsigned int hooknr, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; @@ -32,39 +30,39 @@ static void ebt_nflog(const struct sk_buff *skb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix); + nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, + &li, "%s", info->prefix); + return EBT_CONTINUE; } -static int ebt_nflog_check(const char *tablename, - unsigned int hookmask, - const struct ebt_entry *e, - void *data, unsigned int datalen) +static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + struct ebt_nflog_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nflog_info))) - return -EINVAL; if (info->flags & ~EBT_NFLOG_MASK) - return -EINVAL; + return false; info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } -static struct ebt_watcher nflog __read_mostly = { - .name = EBT_NFLOG_WATCHER, - .watcher = ebt_nflog, - .check = ebt_nflog_check, - .me = THIS_MODULE, +static struct xt_target ebt_nflog_tg_reg __read_mostly = { + .name = "nflog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_nflog_tg, + .checkentry = ebt_nflog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)), + .me = THIS_MODULE, }; static int __init ebt_nflog_init(void) { - return ebt_register_watcher(&nflog); + return xt_register_target(&ebt_nflog_tg_reg); } static void __exit ebt_nflog_fini(void) { - ebt_unregister_watcher(&nflog); + xt_unregister_target(&ebt_nflog_tg_reg); } module_init(ebt_nflog_init); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index 676db32df3d1..883e96e2a542 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -7,50 +7,47 @@ * April, 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_pkttype.h> -#include <linux/module.h> -static int ebt_filter_pkttype(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, - unsigned int datalen) +static bool +ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - return (skb->pkt_type != info->pkt_type) ^ info->invert; + return (skb->pkt_type == info->pkt_type) ^ info->invert; } -static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) - return -EINVAL; if (info->invert != 0 && info->invert != 1) - return -EINVAL; + return false; /* Allow any pkt_type value */ - return 0; + return true; } -static struct ebt_match filter_pkttype __read_mostly = { - .name = EBT_PKTTYPE_MATCH, - .match = ebt_filter_pkttype, - .check = ebt_pkttype_check, +static struct xt_match ebt_pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_pkttype_mt, + .checkentry = ebt_pkttype_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_pkttype_info)), .me = THIS_MODULE, }; static int __init ebt_pkttype_init(void) { - return ebt_register_match(&filter_pkttype); + return xt_register_match(&ebt_pkttype_mt_reg); } static void __exit ebt_pkttype_fini(void) { - ebt_unregister_match(&filter_pkttype); + xt_unregister_match(&ebt_pkttype_mt_reg); } module_init(ebt_pkttype_init); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index b8afe850cf1e..c8a49f7a57ba 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -7,65 +7,70 @@ * April, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> -static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (hooknr != NF_BR_BROUTING) + if (par->hooknum != NF_BR_BROUTING) memcpy(eth_hdr(skb)->h_dest, - in->br_port->br->dev->dev_addr, ETH_ALEN); + par->in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; return info->target; } -static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; + unsigned int hook_mask; - if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target redirect_target __read_mostly = { - .name = EBT_REDIRECT_TARGET, - .target = ebt_target_redirect, - .check = ebt_target_redirect_check, +static struct xt_target ebt_redirect_tg_reg __read_mostly = { + .name = "redirect", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_BROUTING), + .target = ebt_redirect_tg, + .checkentry = ebt_redirect_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_redirect_info)), .me = THIS_MODULE, }; static int __init ebt_redirect_init(void) { - return ebt_register_target(&redirect_target); + return xt_register_target(&ebt_redirect_tg_reg); } static void __exit ebt_redirect_fini(void) { - ebt_unregister_target(&redirect_target); + xt_unregister_target(&ebt_redirect_tg_reg); } module_init(ebt_redirect_init); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 5425333dda03..8d04d4c302bd 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -7,20 +7,19 @@ * June, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> #include <linux/if_arp.h> #include <net/arp.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> -static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -43,46 +42,43 @@ out: return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_snat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat")) - return -EINVAL; - if (hookmask & ~(1 << NF_BR_POST_ROUTING)) - return -EINVAL; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target snat __read_mostly = { - .name = EBT_SNAT_TARGET, - .target = ebt_target_snat, - .check = ebt_target_snat_check, +static struct xt_target ebt_snat_tg_reg __read_mostly = { + .name = "snat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), + .target = ebt_snat_tg, + .checkentry = ebt_snat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_snat_init(void) { - return ebt_register_target(&snat); + return xt_register_target(&ebt_snat_tg_reg); } static void __exit ebt_snat_fini(void) { - ebt_unregister_target(&snat); + xt_unregister_target(&ebt_snat_tg_reg); } module_init(ebt_snat_init); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 40f36d37607d..48527e621626 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -7,11 +7,11 @@ * * July, 2003 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_stp.h> #include <linux/etherdevice.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 @@ -40,7 +40,7 @@ struct stp_config_pdu { #define NR16(p) (p[0] << 8 | p[1]) #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) -static int ebt_filter_config(const struct ebt_stp_info *info, +static bool ebt_filter_config(const struct ebt_stp_info *info, const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; @@ -51,12 +51,12 @@ static int ebt_filter_config(const struct ebt_stp_info *info, c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); if (FWINV(v16 < c->root_priol || v16 > c->root_priou, EBT_STP_ROOTPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTADDR) { verdict = 0; @@ -64,19 +64,19 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & c->root_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); if (FWINV(v32 < c->root_costl || v32 > c->root_costu, EBT_STP_ROOTCOST)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); if (FWINV(v16 < c->sender_priol || v16 > c->sender_priou, EBT_STP_SENDERPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERADDR) { verdict = 0; @@ -84,60 +84,60 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & c->sender_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); if (FWINV(v16 < c->portl || v16 > c->portu, EBT_STP_PORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); if (FWINV(v16 < c->msg_agel || v16 > c->msg_ageu, EBT_STP_MSGAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); if (FWINV(v16 < c->max_agel || v16 > c->max_ageu, EBT_STP_MAXAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); if (FWINV(v16 < c->hello_timel || v16 > c->hello_timeu, EBT_STP_HELLOTIME)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); if (FWINV(v16 < c->forward_delayl || v16 > c->forward_delayu, EBT_STP_FWDD)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_stp_info *info = data; + const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; struct stp_header _stph; const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) - return EBT_NOMATCH; + return false; /* The stp code only considers these */ if (memcmp(sp, header, sizeof(header))) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_TYPE && FWINV(info->type != sp->type, EBT_STP_TYPE)) - return EBT_NOMATCH; + return false; if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { @@ -147,48 +147,48 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in st = skb_header_pointer(skb, sizeof(_stph), sizeof(_stpc), &_stpc); if (st == NULL) - return EBT_NOMATCH; + return false; return ebt_filter_config(info, st); } - return EBT_MATCH; + return true; } -static int ebt_stp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_stp_info *info = data; - const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); + const struct ebt_stp_info *info = par->matchinfo; const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) - return -EINVAL; - if (datalen != len) - return -EINVAL; + return false; /* Make sure the match only receives stp frames */ if (compare_ether_addr(e->destmac, bridge_ula) || compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_stp __read_mostly = { - .name = EBT_STP_MATCH, - .match = ebt_filter_stp, - .check = ebt_stp_check, +static struct xt_match ebt_stp_mt_reg __read_mostly = { + .name = "stp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_stp_mt, + .checkentry = ebt_stp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_stp_info)), .me = THIS_MODULE, }; static int __init ebt_stp_init(void) { - return ebt_register_match(&filter_stp); + return xt_register_match(&ebt_stp_mt_reg); } static void __exit ebt_stp_fini(void) { - ebt_unregister_match(&filter_stp); + xt_unregister_match(&ebt_stp_mt_reg); } module_init(ebt_stp_init); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2d4c9ef909fc..2c6d6823e703 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -36,6 +36,7 @@ #include <linux/timer.h> #include <linux/netlink.h> #include <linux/netdevice.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/netfilter/nf_log.h> @@ -223,7 +224,7 @@ alloc_failure: } /* this function is registered with the netfilter core */ -static void ebt_log_packet(unsigned int pf, unsigned int hooknum, +static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) @@ -245,24 +246,20 @@ static void ebt_log_packet(unsigned int pf, unsigned int hooknum, ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_ulog_info *uloginfo = data; - - ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); + ebt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); + return EBT_CONTINUE; } - -static int ebt_ulog_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_ulog_info *uloginfo = data; + struct ebt_ulog_info *uloginfo = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || - uloginfo->nlgroup > 31) - return -EINVAL; + if (uloginfo->nlgroup > 31) + return false; uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; @@ -272,27 +269,31 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_watcher ulog __read_mostly = { - .name = EBT_ULOG_WATCHER, - .watcher = ebt_ulog, - .check = ebt_ulog_check, +static struct xt_target ebt_ulog_tg_reg __read_mostly = { + .name = "ulog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_ulog_tg, + .checkentry = ebt_ulog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_ulog_info)), .me = THIS_MODULE, }; static const struct nf_logger ebt_ulog_logger = { - .name = EBT_ULOG_WATCHER, + .name = "ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; static int __init ebt_ulog_init(void) { - int i, ret = 0; + bool ret = true; + int i; if (nlbufsiz >= 128*1024) { printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; + return false; } /* initialize ulog_buffers */ @@ -304,13 +305,16 @@ static int __init ebt_ulog_init(void) ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = ebt_register_watcher(&ulog))) + if (!ebtulognl) { + printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " + "call netlink_kernel_create\n"); + ret = false; + } else if (xt_register_target(&ebt_ulog_tg_reg) != 0) { netlink_kernel_release(ebtulognl); + } - if (ret == 0) - nf_log_register(PF_BRIDGE, &ebt_ulog_logger); + if (ret) + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); return ret; } @@ -321,7 +325,7 @@ static void __exit ebt_ulog_fini(void) int i; nf_log_unregister(&ebt_ulog_logger); - ebt_unregister_watcher(&ulog); + xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; if (timer_pending(&ub->timer)) diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index ab60b0dade80..3dddd489328e 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -22,6 +22,7 @@ #include <linux/if_vlan.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_vlan.h> @@ -37,15 +38,12 @@ MODULE_LICENSE("GPL"); #define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ -#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } -static int -ebt_filter_vlan(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_vlan_info *info = data; + const struct ebt_vlan_info *info = par->matchinfo; const struct vlan_hdr *fp; struct vlan_hdr _frame; @@ -57,7 +55,7 @@ ebt_filter_vlan(const struct sk_buff *skb, fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); if (fp == NULL) - return EBT_NOMATCH; + return false; /* Tag Control Information (TCI) consists of the following elements: * - User_priority. The user_priority field is three bits in length, @@ -83,30 +81,20 @@ ebt_filter_vlan(const struct sk_buff *skb, if (GET_BITMASK(EBT_VLAN_ENCAP)) EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); - return EBT_MATCH; + return true; } -static int -ebt_check_vlan(const char *tablename, - unsigned int hooknr, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) { - struct ebt_vlan_info *info = data; - - /* Parameters buffer overflow check */ - if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { - DEBUG_MSG - ("passed size %d is not eq to ebt_vlan_info (%Zd)\n", - datalen, sizeof(struct ebt_vlan_info)); - return -EINVAL; - } + struct ebt_vlan_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; /* Is it 802.1Q frame checked? */ if (e->ethproto != htons(ETH_P_8021Q)) { DEBUG_MSG ("passed entry proto %2.4X is not 802.1Q (8100)\n", (unsigned short) ntohs(e->ethproto)); - return -EINVAL; + return false; } /* Check for bitmask range @@ -114,14 +102,14 @@ ebt_check_vlan(const char *tablename, if (info->bitmask & ~EBT_VLAN_MASK) { DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Check for inversion flags range */ if (info->invflags & ~EBT_VLAN_MASK) { DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", info->invflags, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Reserved VLAN ID (VID) values @@ -136,7 +124,7 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("id %d is out of range (1-4096)\n", info->id); - return -EINVAL; + return false; } /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, @@ -151,7 +139,7 @@ ebt_check_vlan(const char *tablename, if ((unsigned char) info->prio > 7) { DEBUG_MSG("prio %d is out of range (0-7)\n", info->prio); - return -EINVAL; + return false; } } /* Check for encapsulated proto range - it is possible to be @@ -162,17 +150,20 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("encap frame length %d is less than minimal\n", ntohs(info->encap)); - return -EINVAL; + return false; } } - return 0; + return true; } -static struct ebt_match filter_vlan __read_mostly = { - .name = EBT_VLAN_MATCH, - .match = ebt_filter_vlan, - .check = ebt_check_vlan, +static struct xt_match ebt_vlan_mt_reg __read_mostly = { + .name = "vlan", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_vlan_mt, + .checkentry = ebt_vlan_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_vlan_info)), .me = THIS_MODULE, }; @@ -181,12 +172,12 @@ static int __init ebt_vlan_init(void) DEBUG_MSG("ebtables 802.1Q extension module v" MODULE_VERS "\n"); DEBUG_MSG("module debug=%d\n", !!debug); - return ebt_register_match(&filter_vlan); + return xt_register_match(&ebt_vlan_mt_reg); } static void __exit ebt_vlan_fini(void) { - ebt_unregister_match(&filter_vlan); + xt_unregister_match(&ebt_vlan_mt_reg); } module_init(ebt_vlan_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 690bc3ab186c..1a58af51a2e2 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,28 +93,20 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __init ebtable_filter_init(void) { - int i, j, ret; + int ret; ret = ebt_register_table(&frame_filter); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_filter[j]); - ebt_unregister_table(&frame_filter); + ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); + if (ret < 0) + ebt_unregister_table(&frame_filter); return ret; } static void __exit ebtable_filter_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - nf_unregister_hook(&ebt_ops_filter[i]); + nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); ebt_unregister_table(&frame_filter); } diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5b495fe2d0b6..f60c1e78e575 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,28 +100,20 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __init ebtable_nat_init(void) { - int i, ret, j; + int ret; ret = ebt_register_table(&frame_nat); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_nat[j]); - ebt_unregister_table(&frame_nat); + ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); + if (ret < 0) + ebt_unregister_table(&frame_nat); return ret; } static void __exit ebtable_nat_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - nf_unregister_hook(&ebt_ops_nat[i]); + nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); ebt_unregister_table(&frame_nat); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 32afff859e4a..5bb88eb0aad4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -19,6 +19,7 @@ #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -55,29 +56,31 @@ static DEFINE_MUTEX(ebt_mutex); static LIST_HEAD(ebt_tables); -static LIST_HEAD(ebt_targets); -static LIST_HEAD(ebt_matches); -static LIST_HEAD(ebt_watchers); -static struct ebt_target ebt_standard_target = -{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; +static struct xt_target ebt_standard_target = { + .name = "standard", + .revision = 0, + .family = NFPROTO_BRIDGE, + .targetsize = sizeof(int), +}; -static inline int ebt_do_watcher (struct ebt_entry_watcher *w, - const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, - const struct net_device *out) +static inline int +ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, + struct xt_target_param *par) { - w->u.watcher->watcher(skb, hooknr, in, out, w->data, - w->watcher_size); + par->target = w->u.watcher; + par->targinfo = w->data; + w->u.watcher->target(skb, par); /* watchers don't give a verdict */ return 0; } static inline int ebt_do_match (struct ebt_entry_match *m, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out) + const struct sk_buff *skb, struct xt_match_param *par) { - return m->u.match->match(skb, in, out, m->data, - m->match_size); + par->match = m->u.match; + par->matchinfo = m->data; + return m->u.match->match(skb, par); } static inline int ebt_dev_check(char *entry, const struct net_device *device) @@ -153,6 +156,15 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, struct ebt_entries *chaininfo; char *base; struct ebt_table_info *private; + bool hotdrop = false; + struct xt_match_param mtpar; + struct xt_target_param tgpar; + + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.hotdrop = &hotdrop; + tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -173,8 +185,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0) goto letscontinue; + if (hotdrop) { + read_unlock_bh(&table->lock); + return NF_DROP; + } /* increase counter */ (*(counter_base + i)).pcnt++; @@ -182,17 +198,18 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, - out); + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar); t = (struct ebt_entry_target *) (((char *)point) + point->target_offset); /* standard target */ if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; - else - verdict = t->u.target->target(skb, hook, - in, out, t->data, t->target_size); + else { + tgpar.target = t->u.target; + tgpar.targinfo = t->data; + verdict = t->u.target->target(skb, &tgpar); + } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; @@ -312,80 +329,71 @@ find_table_lock(const char *name, int *error, struct mutex *mutex) return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); } -static inline struct ebt_match * -find_match_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex); -} - -static inline struct ebt_watcher * -find_watcher_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex); -} - -static inline struct ebt_target * -find_target_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex); -} - static inline int -ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *cnt) { - struct ebt_match *match; + const struct ebt_entry *e = par->entryinfo; + struct xt_match *match; size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; if (left < sizeof(struct ebt_entry_match) || left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = find_match_lock(m->u.name, &ret, &ebt_mutex); - if (!match) - return ret; - m->u.match = match; - if (!try_module_get(match->me)) { - mutex_unlock(&ebt_mutex); + + match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, + m->u.name, 0), "ebt_%s", m->u.name); + if (IS_ERR(match)) + return PTR_ERR(match); + if (match == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (match->check && - match->check(name, hookmask, e, m->data, m->match_size) != 0) { - BUGPRINT("match->check failed\n"); + m->u.match = match; + + par->match = match; + par->matchinfo = m->data; + ret = xt_check_match(par, m->match_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(match->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } static inline int -ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, + unsigned int *cnt) { - struct ebt_watcher *watcher; + const struct ebt_entry *e = par->entryinfo; + struct xt_target *watcher; size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; if (left < sizeof(struct ebt_entry_watcher) || left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; - watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); - if (!watcher) - return ret; - w->u.watcher = watcher; - if (!try_module_get(watcher->me)) { - mutex_unlock(&ebt_mutex); + + watcher = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, w->u.name, 0), + "ebt_%s", w->u.name); + if (IS_ERR(watcher)) + return PTR_ERR(watcher); + if (watcher == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (watcher->check && - watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) { - BUGPRINT("watcher->check failed\n"); + w->u.watcher = watcher; + + par->target = watcher; + par->targinfo = w->data; + ret = xt_check_target(par, w->watcher_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(watcher->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } @@ -558,30 +566,41 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, static inline int ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.match->destroy) - m->u.match->destroy(m->data, m->match_size); - module_put(m->u.match->me); + par.match = m->u.match; + par.matchinfo = m->data; + par.family = NFPROTO_BRIDGE; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } static inline int ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) { + struct xt_tgdtor_param par; + if (i && (*i)-- == 0) return 1; - if (w->u.watcher->destroy) - w->u.watcher->destroy(w->data, w->watcher_size); - module_put(w->u.watcher->me); + par.target = w->u.watcher; + par.targinfo = w->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } static inline int ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) { + struct xt_tgdtor_param par; struct ebt_entry_target *t; if (e->bitmask == 0) @@ -592,10 +611,13 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - if (t->u.target->destroy) - t->u.target->destroy(t->data, t->target_size); - module_put(t->u.target->me); + par.target = t->u.target; + par.targinfo = t->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -605,10 +627,12 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; - struct ebt_target *target; + struct xt_target *target; unsigned int i, j, hook = 0, hookmask = 0; size_t gap; int ret; + struct xt_mtchk_param mtpar; + struct xt_tgchk_param tgpar; /* don't mess with the struct ebt_entries */ if (e->bitmask == 0) @@ -649,24 +673,31 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, hookmask = cl_s[i - 1].hookmask; } i = 0; - ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i); + + mtpar.table = tgpar.table = name; + mtpar.entryinfo = tgpar.entryinfo = e; + mtpar.hook_mask = tgpar.hook_mask = hookmask; + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); if (ret != 0) goto cleanup_matches; j = 0; - ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j); + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); gap = e->next_offset - e->target_offset; - target = find_target_lock(t->u.name, &ret, &ebt_mutex); - if (!target) + + target = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, t->u.name, 0), + "ebt_%s", t->u.name); + if (IS_ERR(target)) { + ret = PTR_ERR(target); goto cleanup_watchers; - if (!try_module_get(target->me)) { - mutex_unlock(&ebt_mutex); + } else if (target == NULL) { ret = -ENOENT; goto cleanup_watchers; } - mutex_unlock(&ebt_mutex); t->u.target = target; if (t->u.target == &ebt_standard_target) { @@ -681,13 +712,20 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, ret = -EFAULT; goto cleanup_watchers; } - } else if (t->target_size > gap - sizeof(struct ebt_entry_target) || - (t->u.target->check && - t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ + } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { module_put(t->u.target->me); ret = -EFAULT; goto cleanup_watchers; } + + tgpar.target = target; + tgpar.targinfo = t->data; + ret = xt_check_target(&tgpar, t->target_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(target->me); + goto cleanup_watchers; + } (*cnt)++; return 0; cleanup_watchers: @@ -1068,87 +1106,6 @@ free_newinfo: return ret; } -int ebt_register_target(struct ebt_target *target) -{ - struct ebt_target *t; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(t, &ebt_targets, list) { - if (strcmp(t->name, target->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&target->list, &ebt_targets); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_target(struct ebt_target *target) -{ - mutex_lock(&ebt_mutex); - list_del(&target->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_match(struct ebt_match *match) -{ - struct ebt_match *m; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(m, &ebt_matches, list) { - if (strcmp(m->name, match->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&match->list, &ebt_matches); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_match(struct ebt_match *match) -{ - mutex_lock(&ebt_mutex); - list_del(&match->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_watcher(struct ebt_watcher *watcher) -{ - struct ebt_watcher *w; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(w, &ebt_watchers, list) { - if (strcmp(w->name, watcher->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&watcher->list, &ebt_watchers); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_watcher(struct ebt_watcher *watcher) -{ - mutex_lock(&ebt_mutex); - list_del(&watcher->list); - mutex_unlock(&ebt_mutex); -} - int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; @@ -1518,11 +1475,14 @@ static int __init ebtables_init(void) { int ret; - mutex_lock(&ebt_mutex); - list_add(&ebt_standard_target.list, &ebt_targets); - mutex_unlock(&ebt_mutex); - if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) + ret = xt_register_target(&ebt_standard_target); + if (ret < 0) + return ret; + ret = nf_register_sockopt(&ebt_sockopts); + if (ret < 0) { + xt_unregister_target(&ebt_standard_target); return ret; + } printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; @@ -1531,17 +1491,12 @@ static int __init ebtables_init(void) static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); + xt_unregister_target(&ebt_standard_target); printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); EXPORT_SYMBOL(ebt_unregister_table); -EXPORT_SYMBOL(ebt_register_match); -EXPORT_SYMBOL(ebt_unregister_match); -EXPORT_SYMBOL(ebt_register_watcher); -EXPORT_SYMBOL(ebt_unregister_watcher); -EXPORT_SYMBOL(ebt_register_target); -EXPORT_SYMBOL(ebt_unregister_target); EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); diff --git a/net/can/af_can.c b/net/can/af_can.c index 7e8ca2836452..8035fbf526ae 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol) * -ENOBUFS on full driver queue (see net_xmit_errno()) * -ENOMEM when local loopback failed at calling skb_clone() * -EPERM when trying to send on a non-CAN interface + * -EINVAL when the skb->data does not contain a valid CAN frame */ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; + struct can_frame *cf = (struct can_frame *)skb->data; int err; + if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) { + kfree_skb(skb); + return -EINVAL; + } + if (skb->dev->type != ARPHRD_CAN) { kfree_skb(skb); return -EPERM; @@ -605,13 +612,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dev_rcv_lists *d; + struct can_frame *cf = (struct can_frame *)skb->data; int matches; - if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) { + if (dev->type != ARPHRD_CAN || !net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); return 0; } + BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8); + /* update statistics */ can_stats.rx_frames++; can_stats.rx_frames_delta++; @@ -718,7 +728,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = (struct net_device *)data; struct dev_rcv_lists *d; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/bcm.c b/net/can/bcm.c index d9a3a9d13bed..d0dd382001e2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, if (head->nframes) { /* can_frames starting here */ - firstframe = (struct can_frame *) skb_tail_pointer(skb); + firstframe = (struct can_frame *)skb_tail_pointer(skb); memcpy(skb_put(skb, datalen), frames, datalen); @@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) return err; @@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); @@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) skb->dev = dev; skb->sk = sk; - can_send(skb, 1); /* send with loopback */ + err = can_send(skb, 1); /* send with loopback */ dev_put(dev); + if (err) + return err; + return CFSIZ + MHSIZ; } @@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, if (!bo->bound) return -ENOTCONN; + /* check for valid message length from userspace */ + if (size < MHSIZ || (size - MHSIZ) % CFSIZ) + return -EINVAL; + /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { @@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, break; case TX_SEND: - /* we need at least one can_frame */ - if (msg_head.nframes < 1) + /* we need exactly one can_frame behind the msg head */ + if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk); @@ -1288,7 +1303,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, struct bcm_op *op; int notify_enodev = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/raw.c b/net/can/raw.c index 69877b8e7e9c..6e0663faaf9f 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -210,7 +210,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, } else ifindex = ro->ifindex; + if (size != sizeof(struct can_frame)) + return -EINVAL; + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return -ENXIO; diff --git a/net/compat.c b/net/compat.c index c823f6f290cb..67fb6a3834a3 100644 --- a/net/compat.c +++ b/net/compat.c @@ -75,7 +75,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) /* I've named the args so it is easy to tell whose space the pointers are in. */ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, - char *kern_address, int mode) + struct sockaddr *kern_address, int mode) { int tot_len; @@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } +asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, int flags) +{ + compat_sigset_t ss32; + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&ksigmask, &ss32); + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret == -ERESTARTNOHAND) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_RECVMSG) + if (call < SYS_SOCKET || call > SYS_PACCEPT) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2])); + ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_PACCEPT: + ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), + compat_ptr(a[3]), a[4], a[5]); + break; default: ret = -EINVAL; break; diff --git a/net/core/Makefile b/net/core/Makefile index b1332f6d0042..26a37cb31923 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,6 +6,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o +obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 8a28fc93b724..ee631843c2f5 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -9,7 +9,7 @@ * identical recvmsg() code. So we share it here. The poll was * shared before but buried in udp.c so I moved it. * - * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old * udp.c code) * * Fixes: @@ -285,7 +285,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -315,7 +315,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -339,6 +339,93 @@ fault: return -EFAULT; } +/** + * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying to + * @from: io vector to copy to + * @len: amount of data to copy to buffer from iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is modified during the copy. + */ +int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, + struct iovec *from, int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_fromiovec(skb->data + offset, from, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_fromiovec(vaddr + frag->page_offset + + offset - start, from, copy); + kunmap(page); + if (err) + goto fault; + + if (!(len -= copy)) + return 0; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + WARN_ON(start > offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_from_iovec(list, + offset - start, + from, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + } + start = end; + } + } + if (!len) + return 0; + +fault: + return -EFAULT; +} +EXPORT_SYMBOL(skb_copy_datagram_from_iovec); + static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, __wsum *csump) @@ -366,7 +453,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -402,7 +489,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (; list; list=list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/core/dev.c b/net/core/dev.c index c421a1f8f0b9..1408a083fe4e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> #include <net/net_namespace.h> @@ -120,6 +121,12 @@ #include <linux/ctype.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/random.h> #include "net-sysfs.h" @@ -255,9 +262,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU(struct softnet_data, softnet_data); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#ifdef CONFIG_LOCKDEP /* - * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ static const unsigned short netdev_lock_type[] = @@ -295,6 +302,7 @@ static const char *netdev_lock_name[] = "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -307,8 +315,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type) return ARRAY_SIZE(netdev_lock_type) - 1; } -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) { int i; @@ -316,9 +324,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else -static inline void netdev_set_lockdep_class(spinlock_t *lock, - unsigned short dev_type) +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) { } #endif @@ -454,7 +475,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -479,7 +500,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -870,7 +891,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ -int dev_change_name(struct net_device *dev, char *newname) +int dev_change_name(struct net_device *dev, const char *newname) { char oldname[IFNAMSIZ]; int err = 0; @@ -896,7 +917,6 @@ int dev_change_name(struct net_device *dev, char *newname) err = dev_alloc_name(dev, newname); if (err < 0) return err; - strcpy(newname, dev->name); } else if (__dev_get_by_name(net, newname)) return -EEXIST; @@ -934,6 +954,38 @@ rollback: } /** + * dev_set_alias - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device, + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + ASSERT_RTNL(); + + if (len >= IFALIASZ) + return -EINVAL; + + if (!len) { + if (dev->ifalias) { + kfree(dev->ifalias); + dev->ifalias = NULL; + } + return 0; + } + + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + if (!dev->ifalias) + return -ENOMEM; + + strlcpy(dev->ifalias, alias, len+1); + return len; +} + + +/** * netdev_features_change - device changes features * @dev: device to cause notification * @@ -961,6 +1013,12 @@ void netdev_state_change(struct net_device *dev) } } +void netdev_bonding_change(struct net_device *dev) +{ + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); +} +EXPORT_SYMBOL(netdev_bonding_change); + /** * dev_load - load a network module * @net: the applicable net namespace @@ -1117,6 +1175,29 @@ int dev_close(struct net_device *dev) } +/** + * dev_disable_lro - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + if (dev->ethtool_ops && dev->ethtool_ops->get_flags && + dev->ethtool_ops->set_flags) { + u32 flags = dev->ethtool_ops->get_flags(dev); + if (flags & ETH_FLAG_LRO) { + flags &= ~ETH_FLAG_LRO; + dev->ethtool_ops->set_flags(dev, flags); + } + } + WARN_ON(dev->features & NETIF_F_LRO); +} +EXPORT_SYMBOL(dev_disable_lro); + + static int dev_boot_phase = 1; /* @@ -1290,19 +1371,23 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +static inline void __netif_reschedule(struct Qdisc *q) { - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; - struct softnet_data *sd; + struct softnet_data *sd; + unsigned long flags; - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + q->next_sched = sd->output_queue; + sd->output_queue = q; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); +} + +void __netif_schedule(struct Qdisc *q) +{ + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) + __netif_reschedule(q); } EXPORT_SYMBOL(__netif_schedule); @@ -1566,7 +1651,8 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1595,9 +1681,7 @@ gso: skb->next = nskb; return rc; } - if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) && - skb->next)) + if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1608,6 +1692,74 @@ out_kfree_skb: return 0; } +static u32 simple_tx_hashrnd; +static int simple_tx_hashrnd_initialized = 0; + +static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 addr1, addr2, ports; + u32 hash, ihl; + u8 ip_proto = 0; + + if (unlikely(!simple_tx_hashrnd_initialized)) { + get_random_bytes(&simple_tx_hashrnd, 4); + simple_tx_hashrnd_initialized = 1; + } + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; + ihl = ip_hdr(skb)->ihl; + break; + case htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; + default: + return 0; + } + + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); + break; + + default: + ports = 0; + break; + } + + hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); +} + +static struct netdev_queue *dev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + u16 queue_index = 0; + + if (dev->select_queue) + queue_index = dev->select_queue(dev, skb); + else if (dev->real_num_tx_queues > 1) + queue_index = simple_tx_hash(dev, skb); + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1633,10 +1785,10 @@ out_kfree_skb: * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ - int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1669,44 +1821,32 @@ int dev_queue_xmit(struct sk_buff *skb) } gso: - spin_lock_prefetch(&dev->queue_lock); - /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. - * The struct Qdisc which is pointed to by qdisc is now a - * rcu structure - it may be accessed without acquiring - * a lock (but the structure may be stale.) The freeing of the - * qdisc will be deferred until it's known that there are no - * more references to it. - * - * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock - * also serializes access to the device queue. - */ + txq = dev_pick_tx(dev, skb); + q = rcu_dereference(txq->qdisc); - q = rcu_dereference(dev->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; - if (q->enqueue) { - /* reset queue_mapping to zero */ - skb_set_queue_mapping(skb, 0); - rc = q->enqueue(skb, q); - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; + spinlock_t *root_lock = qdisc_lock(q); + + spin_lock(root_lock); + + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { + kfree_skb(skb); + rc = NET_XMIT_DROP; + } else { + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); } - spin_unlock(&dev->queue_lock); + spin_unlock(root_lock); + + goto out; } /* The device has no queue. Common case for software devices: @@ -1724,19 +1864,18 @@ gso: if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev->xmit_lock_owner != cpu) { + if (txq->xmit_lock_owner != cpu) { - HARD_TX_LOCK(dev, cpu); + HARD_TX_LOCK(dev, txq, cpu); - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) { + if (!netif_tx_queue_stopped(txq)) { rc = 0; - if (!dev_hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK(dev); + if (!dev_hard_start_xmit(skb, dev, txq)) { + HARD_TX_UNLOCK(dev, txq); goto out; } } - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); @@ -1810,7 +1949,6 @@ int netif_rx(struct sk_buff *skb) if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { if (queue->input_pkt_queue.qlen) { enqueue: - dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue, skb); local_irq_restore(flags); return NET_RX_SUCCESS; @@ -1842,22 +1980,6 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static inline struct net_device *skb_bond(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - - if (dev->master) { - if (skb_bond_should_drop(skb)) { - kfree_skb(skb); - return NULL; - } - skb->dev = dev->master; - } - - return dev; -} - - static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -1874,13 +1996,13 @@ static void net_tx_action(struct softirq_action *h) struct sk_buff *skb = clist; clist = clist->next; - BUG_TRAP(!atomic_read(&skb->users)); + WARN_ON(atomic_read(&skb->users)); __kfree_skb(skb); } } if (sd->output_queue) { - struct net_device *head; + struct Qdisc *head; local_irq_disable(); head = sd->output_queue; @@ -1888,17 +2010,27 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct net_device *dev = head; - head = head->next_sched; + struct Qdisc *q = head; + spinlock_t *root_lock; - smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + head = head->next_sched; - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + root_lock = qdisc_lock(q); + if (spin_trylock(root_lock)) { + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, + &q->state); + qdisc_run(q); + spin_unlock(root_lock); } else { - netif_schedule(dev); + if (!test_bit(__QDISC_STATE_DEACTIVATED, + &q->state)) { + __netif_reschedule(q); + } else { + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, + &q->state); + } } } } @@ -1979,10 +2111,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -1994,10 +2127,15 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + rxq = &dev->rx_queue; + + q = rxq->qdisc; + if (q != &noop_qdisc) { + spin_lock(qdisc_lock(q)); + if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) + result = qdisc_enqueue_root(skb, q); + spin_unlock(qdisc_lock(q)); + } return result; } @@ -2006,7 +2144,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (skb->dev->rx_queue.qdisc == &noop_qdisc) goto out; if (*pt_prev) { @@ -2030,6 +2168,33 @@ out: } #endif +/* + * netif_nit_deliver - deliver received packets to network taps + * @skb: buffer + * + * This function is used to deliver incoming packets to network + * taps. It should be used when the normal netif_receive_skb path + * is bypassed, for example because of VLAN acceleration. + */ +void netif_nit_deliver(struct sk_buff *skb) +{ + struct packet_type *ptype; + + if (list_empty(&ptype_all)) + return; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->mac_len = skb->network_header - skb->mac_header; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) + deliver_skb(skb, ptype, skb->dev); + } + rcu_read_unlock(); +} + /** * netif_receive_skb - process receive buffer from network * @skb: buffer to process @@ -2049,6 +2214,7 @@ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; + struct net_device *null_or_orig; int ret = NET_RX_DROP; __be16 type; @@ -2062,10 +2228,14 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->iif) skb->iif = skb->dev->ifindex; - orig_dev = skb_bond(skb); - - if (!orig_dev) - return NET_RX_DROP; + null_or_orig = NULL; + orig_dev = skb->dev; + if (orig_dev->master) { + if (skb_bond_should_drop(skb)) + null_or_orig = orig_dev; /* deliver only exact match */ + else + skb->dev = orig_dev->master; + } __get_cpu_var(netdev_rx_stat).total++; @@ -2089,7 +2259,8 @@ int netif_receive_skb(struct sk_buff *skb) #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) { + if (ptype->dev == null_or_orig || ptype->dev == skb->dev || + ptype->dev == orig_dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -2114,7 +2285,8 @@ ncls: list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && - (!ptype->dev || ptype->dev == skb->dev)) { + (ptype->dev == null_or_orig || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -2136,6 +2308,20 @@ out: return ret; } +/* Network device is going away, flush any packets still pending */ +static void flush_backlog(void *arg) +{ + struct net_device *dev = arg; + struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct sk_buff *skb, *tmp; + + skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) + if (skb->dev == dev) { + __skb_unlink(skb, &queue->input_pkt_queue); + kfree_skb(skb); + } +} + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2145,7 +2331,6 @@ static int process_backlog(struct napi_struct *napi, int quota) napi->weight = weight_p; do { struct sk_buff *skb; - struct net_device *dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); @@ -2154,14 +2339,9 @@ static int process_backlog(struct napi_struct *napi, int quota) local_irq_enable(); break; } - local_irq_enable(); - dev = skb->dev; - netif_receive_skb(skb); - - dev_put(dev); } while (++work < quota && jiffies == start_time); return work; @@ -2261,7 +2441,7 @@ out: */ if (!cpus_empty(net_dma.channel_mask)) { int chan_idx; - for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { struct dma_chan *chan = net_dma.channels[chan_idx]; if (chan) dma_async_memcpy_issue_pending(chan); @@ -2769,16 +2949,35 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -static void __dev_set_promiscuity(struct net_device *dev, int inc) +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - else - dev->flags |= IFF_PROMISC; + dev->flags |= IFF_PROMISC; + dev->promiscuity += inc; + if (dev->promiscuity == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch promisc and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_PROMISC; + else { + dev->promiscuity -= inc; + printk(KERN_WARNING "%s: promiscuity touches roof, " + "set promiscuity failed, promiscuity feature " + "of device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags != old_flags) { printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -2793,9 +2992,9 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } + return 0; } /** @@ -2807,14 +3006,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_promiscuity(struct net_device *dev, int inc) +int dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + int err; - __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc); + if (err < 0) + return err; if (dev->flags != old_flags) dev_set_rx_mode(dev); + return err; } /** @@ -2827,22 +3031,37 @@ void dev_set_promiscuity(struct net_device *dev, int inc) * to all interfaces. Once it hits zero the device reverts back to normal * filtering operation. A negative @inc value is used to drop the counter * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_allmulti(struct net_device *dev, int inc) +int dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; + dev->allmulti += inc; + if (dev->allmulti == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch allmulti and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_ALLMULTI; + else { + dev->allmulti -= inc; + printk(KERN_WARNING "%s: allmulti touches roof, " + "set allmulti failed, allmulti feature of " + "device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } + return 0; } /* @@ -2881,9 +3100,9 @@ void __dev_set_rx_mode(struct net_device *dev) void dev_set_rx_mode(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, int *count, @@ -2961,11 +3180,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -2973,7 +3192,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase @@ -2987,11 +3206,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3058,12 +3277,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3079,15 +3298,15 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -3107,7 +3326,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) static void dev_addr_discard(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; @@ -3115,9 +3334,15 @@ static void dev_addr_discard(struct net_device *dev) __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } +/** + * dev_get_flags - get flags reported to userspace + * @dev: device + * + * Get the combination of flag bits exported through APIs to userspace. + */ unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -3142,6 +3367,14 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ int dev_change_flags(struct net_device *dev, unsigned flags) { int ret, changes; @@ -3163,8 +3396,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3211,6 +3444,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +/** + * dev_set_mtu - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * + * Change the maximum transfer size of the network device. + */ int dev_set_mtu(struct net_device *dev, int new_mtu) { int err; @@ -3235,6 +3475,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return err; } +/** + * dev_set_mac_address - Change Media Access Control Address + * @dev: device + * @sa: new address + * + * Change the hardware (MAC) address of the device + */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { int err; @@ -3624,14 +3871,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -3678,7 +3922,7 @@ static void rollback_registered(struct net_device *dev) dev->uninit(dev); /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(!dev->master); + WARN_ON(dev->master); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -3688,6 +3932,21 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } +static void __netdev_init_queue_locks_one(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + spin_lock_init(&dev_queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); + dev_queue->xmit_lock_owner = -1; +} + +static void netdev_init_queue_locks(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); + __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3722,11 +3981,9 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->_xmit_lock); - netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); - dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); + spin_lock_init(&dev->addr_list_lock); + netdev_set_addr_lockdep_class(dev); + netdev_init_queue_locks(dev); dev->iflink = -1; @@ -3806,6 +4063,10 @@ int register_netdevice(struct net_device *dev) } } + /* Enable software GSO if SG is supported. */ + if (dev->features & NETIF_F_SG) + dev->features |= NETIF_F_GSO; + netdev_initialize_kobject(dev); ret = netdev_register_kobject(dev); if (ret) @@ -3941,33 +4202,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -3983,13 +4235,15 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; + on_each_cpu(flush_backlog, dev, 1); + netdev_wait_allrefs(dev); /* paranoia */ BUG_ON(atomic_read(&dev->refcnt)); - BUG_TRAP(!dev->ip_ptr); - BUG_TRAP(!dev->ip6_ptr); - BUG_TRAP(!dev->dn_ptr); + WARN_ON(dev->ip_ptr); + WARN_ON(dev->ip6_ptr); + WARN_ON(dev->dn_ptr); if (dev->destructor) dev->destructor(dev); @@ -3997,9 +4251,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) @@ -4007,6 +4258,20 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, + void *_unused) +{ + queue->dev = dev; +} + +static void netdev_init_queues(struct net_device *dev) +{ + netdev_init_one_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); + spin_lock_init(&dev->tx_global_lock); +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -4021,14 +4286,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev) struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { - void *p; + struct netdev_queue *tx; struct net_device *dev; - int alloc_size; + size_t alloc_size; + void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); - alloc_size = sizeof(struct net_device) + - sizeof(struct net_device_subqueue) * (queue_count - 1); + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; @@ -4043,22 +4308,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } + tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); + if (!tx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "tx qdiscs.\n"); + kfree(p); + return NULL; + } + dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; dev_net_set(dev, &init_net); + dev->_tx = tx; + dev->num_tx_queues = queue_count; + dev->real_num_tx_queues = queue_count; + if (sizeof_priv) { dev->priv = ((char *)dev + - ((sizeof(struct net_device) + - (sizeof(struct net_device_subqueue) * - (queue_count - 1)) + NETDEV_ALIGN_CONST) + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); } - dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); @@ -4079,6 +4355,8 @@ void free_netdev(struct net_device *dev) { release_net(dev_net(dev)); + kfree(dev->_tx); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4092,7 +4370,12 @@ void free_netdev(struct net_device *dev) put_device(&dev->dev); } -/* Synchronize with packet receive processing. */ +/** + * synchronize_net - Synchronize with packet receive processing + * + * Wait for packets currently being received to be done. + * Does not block later packets from starting. + */ void synchronize_net(void) { might_sleep(); @@ -4260,7 +4543,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct net_device **list_net; + struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; @@ -4322,7 +4605,7 @@ static void net_dma_rebalance(struct net_dma *net_dma) i = 0; cpu = first_cpu(cpu_online_map); - for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { chan = net_dma->channels[chan_idx]; n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) @@ -4394,7 +4677,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } /** - * netdev_dma_regiser - register the networking subsystem as a DMA client + * netdev_dma_register - register the networking subsystem as a DMA client */ static int __init netdev_dma_register(void) { @@ -4440,6 +4723,12 @@ int netdev_compute_features(unsigned long all, unsigned long one) one |= NETIF_F_GSO_SOFTWARE; one |= NETIF_F_GSO; + /* + * If even one device supports a GSO protocol with software fallback, + * enable it for all. + */ + all |= one & NETIF_F_GSO_SOFTWARE; + /* If even one device supports robust GSO, enable it for all. */ if (one & NETIF_F_GSO_ROBUST) all |= NETIF_F_GSO_ROBUST; @@ -4489,6 +4778,34 @@ err_name: return -ENOMEM; } +/** + * netdev_drivername - network driver for the device + * @dev: network device + * @buffer: buffer for resulting name + * @len: size of buffer + * + * Determine network driver for device. + */ +char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +{ + const struct device_driver *driver; + const struct device *parent; + + if (len <= 0 || !buffer) + return buffer; + buffer[0] = 0; + + parent = dev->dev.parent; + + if (!parent) + return buffer; + + driver = parent->driver; + if (driver && driver->name) + strlcpy(buffer, driver->name, len); + return buffer; +} + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); @@ -4585,8 +4902,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index f8a3455f4493..9e2fa39f22a3 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -6,7 +6,7 @@ * Richard Underwood <richard@wuzz.demon.co.uk> * * Stir fried together from the IP multicast and CAP patches above - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Update the device on a real delete @@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) { @@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) __dev_set_rx_mode(dev); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } @@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); @@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return 0; } diff --git a/net/core/dst.c b/net/core/dst.c index fe03266130b6..09c1530f4681 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -203,6 +203,7 @@ void __dst_free(struct dst_entry * dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; + cancel_delayed_work(&dst_gc_work); schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0133b5ebd545..14ada537f895 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc cmd; + + if (!dev->ethtool_ops->set_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return dev->ethtool_ops->set_rxhash(dev, &cmd); +} + +static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc info; + + if (!dev->ethtool_ops->get_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + dev->ethtool_ops->get_rxhash(dev, &info); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GGSO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: + case ETHTOOL_GRXFH: break; default: if (!capable(CAP_NET_ADMIN)) @@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; + case ETHTOOL_GRXFH: + rc = ethtool_get_rxhash(dev, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxhash(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0f74e3..79de3b14a8d1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops) static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } int fib_rules_register(struct fib_rules_ops *ops) @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } diff --git a/net/core/filter.c b/net/core/filter.c index 4f8369729a4e..df3744355839 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller diff --git a/net/core/flow.c b/net/core/flow.c index 19991175fdeb..5cf81052d044 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -298,7 +298,7 @@ void flow_cache_flush(void) init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); + smp_call_function(flow_cache_flush_per_cpu, &info, 0); flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); diff --git a/net/core/iovec.c b/net/core/iovec.c index 755c37fdaee7..4c9c0121c9da 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -36,7 +36,7 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { int size, err, ct; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a5e372b9ec4d..bf8f7af699d7 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev) } -static int linkwatch_urgent_event(struct net_device *dev) +static bool linkwatch_urgent_event(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev) && - dev->qdisc != dev->qdisc_sleeping; + qdisc_tx_changing(dev); } @@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only) rfc2863_policy(dev); if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) { - WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + if (netif_carrier_ok(dev)) dev_activate(dev); - } else + else dev_deactivate(dev); netdev_state_change(dev); @@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { - int urgent = linkwatch_urgent_event(dev); + bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 65f01f71b3f3..1dc728b38589 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -927,9 +927,9 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { struct sk_buff *buff; - buff = neigh->arp_queue.next; - __skb_unlink(buff, &neigh->arp_queue); + buff = __skb_dequeue(&neigh->arp_queue); kfree_skb(buff); + NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } __skb_queue_tail(&neigh->arp_queue, skb); } @@ -1258,24 +1258,20 @@ static void neigh_proxy_process(unsigned long arg) struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb; + struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); - skb = tbl->proxy_queue.next; - - while (skb != (struct sk_buff *)&tbl->proxy_queue) { - struct sk_buff *back = skb; - long tdif = NEIGH_CB(back)->sched_next - now; + skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { + long tdif = NEIGH_CB(skb)->sched_next - now; - skb = skb->next; if (tdif <= 0) { - struct net_device *dev = back->dev; - __skb_unlink(back, &tbl->proxy_queue); + struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) - tbl->proxy_redo(back); + tbl->proxy_redo(skb); else - kfree_skb(back); + kfree_skb(skb); dev_put(dev); } else if (!sched_next || tdif < sched_next) @@ -2280,6 +2276,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) struct neighbour *n = neigh_get_first(seq); if (n) { + --(*pos); while (*pos) { n = neigh_get_next(seq, n, pos); if (!n) @@ -2340,6 +2337,7 @@ static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) struct pneigh_entry *pn = pneigh_get_first(seq); if (pn) { + --(*pos); while (*pos) { pn = pneigh_get_next(seq, pn, pos); if (!pn) @@ -2353,10 +2351,11 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; void *rc; + loff_t idxpos = *pos; - rc = neigh_get_idx(seq, pos); + rc = neigh_get_idx(seq, &idxpos); if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) - rc = pneigh_get_idx(seq, pos); + rc = pneigh_get_idx(seq, &idxpos); return rc; } @@ -2365,7 +2364,6 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl __acquires(tbl->lock) { struct neigh_seq_state *state = seq->private; - loff_t pos_minus_one; state->tbl = tbl; state->bucket = 0; @@ -2373,8 +2371,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl read_lock_bh(&tbl->lock); - pos_minus_one = *pos - 1; - return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; + return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } EXPORT_SYMBOL(neigh_seq_start); @@ -2384,7 +2381,7 @@ void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) void *rc; if (v == SEQ_START_TOKEN) { - rc = neigh_get_idx(seq, pos); + rc = neigh_get_first(seq); goto out; } @@ -2462,12 +2459,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2483,7 +2480,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->rcv_probes_ucast, st->periodic_gc_runs, - st->forced_gc_runs + st->forced_gc_runs, + st->unres_discards ); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 90e2177af081..92d6b9467314 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + size_t count = len; + ssize_t ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* ignore trailing newline */ + if (len > 0 && buf[len - 1] == '\n') + --count; + + rtnl_lock(); + ret = dev_set_alias(netdev, buf, count); + rtnl_unlock(); + + return ret < 0 ? ret : len; +} + +static ssize_t show_ifalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + ssize_t ret = 0; + + rtnl_lock(); + if (netdev->ifalias) + ret = sprintf(buf, "%s\n", netdev->ifalias); + rtnl_unlock(); + return ret; +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), + __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), __ATTR(features, S_IRUGO, show_features, NULL), @@ -242,11 +277,11 @@ static ssize_t netstat_show(const struct device *d, offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_stats && - (stats = (*dev->get_stats)(dev))) + if (dev_isalive(dev)) { + stats = dev->get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - + } read_unlock(&dev_base_lock); return ret; } @@ -318,7 +353,7 @@ static struct attribute_group netstat_group = { .attrs = netstat_attrs, }; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS /* helper function that does all the locking etc for wireless stats */ static ssize_t wireless_show(struct device *d, char *buf, ssize_t (*format)(const struct iw_statistics *, @@ -418,6 +453,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); + kfree(dev->ifalias); kfree((char *)dev - dev->padded); } @@ -457,10 +493,9 @@ int netdev_register_kobject(struct net_device *net) strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); #ifdef CONFIG_SYSFS - if (net->get_stats) - *groups++ = &netstat_group; + *groups++ = &netstat_group; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif @@ -469,6 +504,19 @@ int netdev_register_kobject(struct net_device *net) return device_add(dev); } +int netdev_class_create_file(struct class_attribute *class_attr) +{ + return class_create_file(&net_class, class_attr); +} + +void netdev_class_remove_file(struct class_attribute *class_attr) +{ + class_remove_file(&net_class, class_attr); +} + +EXPORT_SYMBOL(netdev_class_create_file); +EXPORT_SYMBOL(netdev_class_remove_file); + void netdev_initialize_kobject(struct net_device *net) { struct device *device = &(net->dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7c52fe277b62..b0dc818a91d7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list; static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net; EXPORT_SYMBOL(init_net); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8fb134da0346..6c7af390be0a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,25 +58,28 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); - netif_tx_lock(dev); - if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + __netif_tx_lock(txq, smp_processor_id()); + if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); } } @@ -278,17 +281,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { + struct netdev_queue *txq; unsigned long flags; + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) + if (__netif_tx_trylock(txq)) { + if (!netif_tx_queue_stopped(txq)) status = dev->hard_start_xmit(skb, dev); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) break; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fdf537707e51..99f656d35b4f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -168,7 +168,7 @@ #include <asm/div64.h> /* do_div */ #include <asm/timex.h> -#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.70: Packet Generator for packet performance testing.\n" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ @@ -189,6 +189,7 @@ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ +#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -621,6 +622,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_QUEUE_MAP_RND) seq_printf(seq, "QUEUE_MAP_RND "); + if (pkt_dev->flags & F_QUEUE_MAP_CPU) + seq_printf(seq, "QUEUE_MAP_CPU "); + if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ @@ -1134,6 +1138,12 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!QUEUE_MAP_RND") == 0) pkt_dev->flags &= ~F_QUEUE_MAP_RND; + + else if (strcmp(f, "QUEUE_MAP_CPU") == 0) + pkt_dev->flags |= F_QUEUE_MAP_CPU; + + else if (strcmp(f, "!QUEUE_MAP_CPU") == 0) + pkt_dev->flags &= ~F_QUEUE_MAP_CPU; #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1875,7 +1885,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -1895,6 +1905,23 @@ static int pktgen_device_event(struct notifier_block *unused, return NOTIFY_DONE; } +static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, const char *ifname) +{ + char b[IFNAMSIZ+5]; + int i = 0; + + for(i=0; ifname[i] != '@'; i++) { + if(i == IFNAMSIZ) + break; + + b[i] = ifname[i]; + } + b[i] = 0; + + return dev_get_by_name(&init_net, b); +} + + /* Associate pktgen_dev with a device. */ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) @@ -1908,7 +1935,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) pkt_dev->odev = NULL; } - odev = dev_get_by_name(&init_net, ifname); + odev = pktgen_dev_get_by_name(pkt_dev, ifname); if (!odev) { printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -1934,6 +1961,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) */ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { + int ntxq; + if (!pkt_dev->odev) { printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " "setup_inject.\n"); @@ -1942,6 +1971,33 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) return; } + /* make sure that we don't pick a non-existing transmit queue */ + ntxq = pkt_dev->odev->real_num_tx_queues; + if (ntxq <= num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { + printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU " + "disabled because CPU count (%d) exceeds number ", + num_online_cpus()); + printk(KERN_WARNING "pktgen: WARNING: of tx queues " + "(%d) on %s \n", ntxq, pkt_dev->odev->name); + pkt_dev->flags &= ~F_QUEUE_MAP_CPU; + } + if (ntxq <= pkt_dev->queue_map_min) { + printk(KERN_WARNING "pktgen: WARNING: Requested " + "queue_map_min (%d) exceeds number of tx\n", + pkt_dev->queue_map_min); + printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " + "%s, resetting\n", ntxq, pkt_dev->odev->name); + pkt_dev->queue_map_min = ntxq - 1; + } + if (ntxq <= pkt_dev->queue_map_max) { + printk(KERN_WARNING "pktgen: WARNING: Requested " + "queue_map_max (%d) exceeds number of tx\n", + pkt_dev->queue_map_max); + printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " + "%s, resetting\n", ntxq, pkt_dev->odev->name); + pkt_dev->queue_map_max = ntxq - 1; + } + /* Default to the interface's mac if not explicitly set. */ if (is_zero_ether_addr(pkt_dev->src_mac)) @@ -2085,15 +2141,19 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) if (pkt_dev->flows[flow].count >= pkt_dev->lflow) { /* reset time */ pkt_dev->flows[flow].count = 0; + pkt_dev->flows[flow].flags = 0; pkt_dev->curfl += 1; if (pkt_dev->curfl >= pkt_dev->cflows) pkt_dev->curfl = 0; /*reset */ } } else { flow = random32() % pkt_dev->cflows; + pkt_dev->curfl = flow; - if (pkt_dev->flows[flow].count > pkt_dev->lflow) + if (pkt_dev->flows[flow].count > pkt_dev->lflow) { pkt_dev->flows[flow].count = 0; + pkt_dev->flows[flow].flags = 0; + } } return pkt_dev->curfl; @@ -2123,6 +2183,28 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } #endif +static void set_cur_queue_map(struct pktgen_dev *pkt_dev) +{ + + if (pkt_dev->flags & F_QUEUE_MAP_CPU) + pkt_dev->cur_queue_map = smp_processor_id(); + + else if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - + pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2144,7 +2226,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) mc = random32() % pkt_dev->src_mac_count; else { mc = pkt_dev->cur_src_mac_offset++; - if (pkt_dev->cur_src_mac_offset > + if (pkt_dev->cur_src_mac_offset >= pkt_dev->src_mac_count) pkt_dev->cur_src_mac_offset = 0; } @@ -2171,7 +2253,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) else { mc = pkt_dev->cur_dst_mac_offset++; - if (pkt_dev->cur_dst_mac_offset > + if (pkt_dev->cur_dst_mac_offset >= pkt_dev->dst_mac_count) { pkt_dev->cur_dst_mac_offset = 0; } @@ -2325,19 +2407,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } - if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { - __u16 t; - if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = random32() % - (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; - } else { - t = pkt_dev->cur_queue_map + 1; - if (t > pkt_dev->queue_map_max) - t = pkt_dev->queue_map_min; - } - pkt_dev->cur_queue_map = t; - } + set_cur_queue_map(pkt_dev); pkt_dev->flows[flow].count++; } @@ -2404,7 +2474,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret < 0) { printk(KERN_ERR "Error expanding " "ipsec packet %d\n",ret); - return 0; + goto err; } } @@ -2414,8 +2484,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret) { printk(KERN_ERR "Error creating ipsec " "packet %d\n",ret); - kfree_skb(skb); - return 0; + goto err; } /* restore ll */ eth = (__u8 *) skb_push(skb, ETH_HLEN); @@ -2424,6 +2493,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, } } return 1; +err: + kfree_skb(skb); + return 0; } #endif @@ -2458,7 +2530,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2469,6 +2541,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; @@ -2507,7 +2580,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2797,6 +2870,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2807,6 +2881,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + @@ -2844,7 +2919,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3263,7 +3338,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = NULL; + struct netdev_queue *txq; __u64 idle_start = 0; + u16 queue_map; int ret; odev = pkt_dev->odev; @@ -3285,9 +3362,16 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if ((netif_queue_stopped(odev) || - (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb))) || + if (!pkt_dev->skb) { + set_cur_queue_map(pkt_dev); + queue_map = pkt_dev->cur_queue_map; + } else { + queue_map = skb_get_queue_mapping(pkt_dev->skb); + } + + txq = netdev_get_tx_queue(odev, queue_map); + if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || need_resched()) { idle_start = getCurUs(); @@ -3303,8 +3387,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb)) { + if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3331,9 +3415,13 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb)) { + /* fill_packet() might have changed the queue */ + queue_map = skb_get_queue_mapping(pkt_dev->skb); + txq = netdev_get_tx_queue(odev, queue_map); + + __netif_tx_lock_bh(txq); + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3377,7 +3465,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - netif_tx_unlock_bh(odev); + __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 2d3035d3abd7..7552495aff7a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -123,7 +123,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } } - BUG_TRAP(lopt->qlen == 0); + WARN_ON(lopt->qlen != 0); if (lopt_size > PAGE_SIZE) vfree(lopt); else diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9a77216310e..3630131fa1fa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } @@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) @@ -605,8 +606,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { + struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct net_device_stats *stats; + struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -633,8 +637,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + + if (dev->ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); if (1) { struct rtnl_link_ifmap map = { @@ -653,19 +661,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; - copy_rtnl_link_stats(nla_data(attr), stats); - } - } + stats = dev->get_stats(dev); + copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) @@ -715,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -855,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_IFALIAS]) { + err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); + if (err < 0) + goto errout; + modified = 1; + } + if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c new file mode 100644 index 000000000000..86234923a3b7 --- /dev/null +++ b/net/core/skb_dma_map.c @@ -0,0 +1,66 @@ +/* skb_dma_map.c: DMA mapping helpers for socket buffers. + * + * Copyright (C) David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/skbuff.h> + +int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + dma_addr_t map; + int i; + + map = dma_map_single(dev, skb->data, + skb_headlen(skb), dir); + if (dma_mapping_error(dev, map)) + goto out_err; + + sp->dma_maps[0] = map; + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + map = dma_map_page(dev, fp->page, fp->page_offset, + fp->size, dir); + if (dma_mapping_error(dev, map)) + goto unwind; + sp->dma_maps[i + 1] = map; + } + sp->num_dma_maps = i + 1; + + return 0; + +unwind: + while (--i >= 0) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); +out_err: + return -ENOMEM; +} +EXPORT_SYMBOL(skb_dma_map); + +void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + int i; + + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } +} +EXPORT_SYMBOL(skb_dma_unmap); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e556d312117..4e22e3a35359 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1,11 +1,9 @@ /* * Routines having to do with the 'struct sk_buff' memory handlers. * - * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. @@ -265,6 +263,26 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, return skb; } +struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) +{ + int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + struct page *page; + + page = alloc_pages_node(node, gfp_mask, 0); + return page; +} +EXPORT_SYMBOL(__netdev_alloc_page); + +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += size; +} +EXPORT_SYMBOL(skb_add_rx_frag); + /** * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate @@ -365,8 +383,7 @@ static void kfree_skbmem(struct sk_buff *skb) } } -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_head_state(struct sk_buff *skb) { dst_release(skb->dst); #ifdef CONFIG_XFRM @@ -390,6 +407,12 @@ static void skb_release_all(struct sk_buff *skb) skb->tc_verd = 0; #endif #endif +} + +/* Free everything but the sk_buff shell. */ +static void skb_release_all(struct sk_buff *skb) +{ + skb_release_head_state(skb); skb_release_data(skb); } @@ -426,6 +449,38 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +int skb_recycle_check(struct sk_buff *skb, int skb_size) +{ + struct skb_shared_info *shinfo; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return 0; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return 0; + + if (skb_shared(skb) || skb_cloned(skb)) + return 0; + + skb_release_head_state(skb); + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb_reset_tail_pointer(skb); + skb->data = skb->head + NET_SKB_PAD; + + return 1; +} +EXPORT_SYMBOL(skb_recycle_check); + static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; @@ -461,6 +516,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_tci = old->vlan_tci; + skb_copy_secmark(new, old); } @@ -485,6 +542,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(head); C(data); C(truesize); +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + C(do_not_encrypt); +#endif atomic_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); @@ -698,6 +758,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #endif long off; + BUG_ON(nhead < 0); + if (skb_shared(skb)) BUG(); @@ -1200,7 +1262,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1229,7 +1291,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1282,107 +1344,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, return 0; } -/* - * Map linear and fragment data from the skb to spd. Returns number of - * pages mapped. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *total_len, - struct splice_pipe_desc *spd) +static inline void __segment_seek(struct page **page, unsigned int *poff, + unsigned int *plen, unsigned int off) { - unsigned int nr_pages = spd->nr_pages; - unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + *poff += off; + *page += *poff / PAGE_SIZE; + *poff = *poff % PAGE_SIZE; + *plen -= off; +} - toff = *offset; - tlen = *total_len; - if (!tlen) - goto err; +static inline int __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd) +{ + if (!*len) + return 1; - /* - * if the offset is greater than the linear part, go directly to - * the fragments. - */ - headlen = skb_headlen(skb); - if (toff >= headlen) { - toff -= headlen; - goto map_frag; + /* skip this segment if already processed */ + if (*off >= plen) { + *off -= plen; + return 0; } - /* - * first map the linear region into the pages/partial map, skipping - * any potential initial offset. - */ - len = 0; - while (len < headlen) { - void *p = skb->data + len; - - poff = (unsigned long) p & (PAGE_SIZE - 1); - plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); - len += plen; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } + /* ignore any bits we already processed */ + if (*off) { + __segment_seek(&page, &poff, &plen, *off); + *off = 0; + } - plen = min(plen, tlen); - if (!plen) - break; + do { + unsigned int flen = min(*len, plen); - /* - * just jump directly to update and return, no point - * in going over fragments when the output is full. - */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) - goto done; + /* the linear region may spread across several pages */ + flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - tlen -= plen; - } + if (spd_fill_page(spd, page, flen, poff, skb)) + return 1; + + __segment_seek(&page, &poff, &plen, flen); + *len -= flen; + + } while (*len && plen); + + return 0; +} + +/* + * Map linear and fragment data from the skb to spd. It reports failure if the + * pipe is full or if we already spliced the requested length. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *len, + struct splice_pipe_desc *spd) +{ + int seg; + + /* + * map the linear part + */ + if (__splice_segment(virt_to_page(skb->data), + (unsigned long) skb->data & (PAGE_SIZE - 1), + skb_headlen(skb), + offset, len, skb, spd)) + return 1; /* * then map the fragments */ -map_frag: for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - plen = f->size; - poff = f->page_offset; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } - - plen = min(plen, tlen); - if (!plen) - break; - - if (spd_fill_page(spd, f->page, plen, poff, skb)) - break; - - tlen -= plen; + if (__splice_segment(f->page, f->page_offset, f->size, + offset, len, skb, spd)) + return 1; } -done: - if (spd->nr_pages - nr_pages) { - *offset = 0; - *total_len = tlen; - return 0; - } -err: - return 1; + return 0; } /* @@ -1499,7 +1537,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + frag->size; if ((copy = end - offset) > 0) { @@ -1527,7 +1565,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1576,7 +1614,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1605,7 +1643,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -1653,7 +1691,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -1686,7 +1724,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, __wsum csum2; int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -2277,13 +2315,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) segs = nskb; tail = nskb; - nskb->dev = skb->dev; - skb_copy_queue_mapping(nskb, skb); - nskb->priority = skb->priority; - nskb->protocol = skb->protocol; - nskb->dst = dst_clone(skb->dst); - memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); - nskb->pkt_type = skb->pkt_type; + __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; skb_reserve(nskb, headroom); @@ -2294,6 +2326,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_copy_from_linear_data(skb, skb_put(nskb, doffset), doffset); if (!sg) { + nskb->ip_summed = CHECKSUM_NONE; nskb->csum = skb_copy_and_csum_bits(skb, offset, skb_put(nskb, len), len, 0); @@ -2303,8 +2336,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag = skb_shinfo(nskb)->frags; k = 0; - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum = skb->csum; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); @@ -2396,7 +2427,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -2420,7 +2451,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { @@ -2585,6 +2616,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) return true; } +void __skb_warn_lro_forwarding(const struct sk_buff *skb) +{ + if (net_ratelimit()) + pr_warning("%s: received packets cannot be forwarded" + " while LRO is enabled\n", skb->dev->name); +} + EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); @@ -2618,6 +2656,7 @@ EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); EXPORT_SYMBOL(skb_append_datato_frags); +EXPORT_SYMBOL(__skb_warn_lro_forwarding); EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_cow_data); diff --git a/net/core/sock.c b/net/core/sock.c index 88094cb09c06..5e2a3132a8c9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,8 +7,6 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -156,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -170,7 +169,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_MAX" + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -182,9 +182,10 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-29" , + "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_MAX" + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_MAX" }; #endif @@ -326,7 +327,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) */ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); - rc = sk->sk_backlog_rcv(sk, skb); + rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); } else @@ -1068,7 +1069,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) * to be taken into account in all callers. -acme */ sk_refcnt_debug_inc(newsk); - newsk->sk_socket = NULL; + sk_set_socket(newsk, NULL); newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) @@ -1373,7 +1374,7 @@ static void __release_sock(struct sock *sk) struct sk_buff *next = skb->next; skb->next = NULL; - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); /* * We are in process context here with softirqs @@ -1444,7 +1445,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Under pressure. */ if (allocated > prot->sysctl_mem[1]) if (prot->enter_memory_pressure) - prot->enter_memory_pressure(); + prot->enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > prot->sysctl_mem[2]) @@ -1704,7 +1705,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; - sk->sk_socket = sock; + sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); diff --git a/net/core/stream.c b/net/core/stream.c index 4a0ad152c9c4..8727cead64ad 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -9,7 +9,7 @@ * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * (from old tcp.c code) - * Alan Cox <alan@redhat.com> (Borrowed comments 8-)) + * Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-)) */ #include <linux/module.h> @@ -192,13 +192,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_error_queue); /* Next, the write queue. */ - BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); + WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim(sk); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5fc801057244..f686467ff12b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", - .data = &net_msg_cost, + .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", - .data = &net_msg_burst, + .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &init_net.core.sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = { { .ctl_name = 0 } }; +static struct ctl_table netns_core_table[] = { + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + static __net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, @@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = { static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; net->core.sysctl_somaxconn = SOMAXCONN; - tbl = net_core_table; + tbl = netns_core_table; if (net != &init_net) { - tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) { - if (tmp->data >= (void *)&init_net && - tmp->data < (void *)(&init_net + 1)) - tmp->data += (char *)net - (char *)&init_net; - else - tmp->mode &= ~0222; - } + tbl[0].data = &net->core.sysctl_somaxconn; } net->core.sysctl_hdr = register_net_sysctl_table(net, @@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net) return 0; err_reg: - if (tbl != net_core_table) + if (tbl != netns_core_table) kfree(tbl); err_dup: return -ENOMEM; @@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) tbl = net->core.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->core.sysctl_hdr); - BUG_ON(tbl == net_core_table); + BUG_ON(tbl == netns_core_table); kfree(tbl); } @@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index c77aff9c6eb3..164b090d5ac3 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -27,13 +27,13 @@ #include <linux/dmaengine.h> #include <linux/socket.h> -#include <linux/rtnetlink.h> /* for BUG_TRAP */ #include <net/tcp.h> #include <net/netdma.h> #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. @@ -71,7 +71,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; @@ -100,7 +100,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; copy = end - offset; diff --git a/net/core/utils.c b/net/core/utils.c index 8031eb59054e..72e0ebe964a0 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -31,17 +31,16 @@ #include <asm/system.h> #include <asm/uaccess.h> -int net_msg_cost __read_mostly = 5*HZ; -int net_msg_burst __read_mostly = 10; int net_msg_warn __read_mostly = 1; EXPORT_SYMBOL(net_msg_warn); +DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { - return __printk_ratelimit(net_msg_cost, net_msg_burst); + return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8e9580874216..9a430734530c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, bool, 0444); +module_param(ccid2_debug, bool, 0644); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index a1929f33d703..3b8bd7ca6761 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -794,7 +794,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; - const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; + const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; const bool is_data_packet = dccp_data_packet(skb); if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { @@ -825,18 +825,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* - * Handle pending losses and otherwise check for new loss + * Perform loss detection and handle pending losses */ - if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) && - tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - skb, ndp, ccid3_first_li, sk) ) { + if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, + skb, ndp, ccid3_first_li, sk)) { do_feedback = CCID3_FBACK_PARAM_CHANGE; goto done_receiving; } - if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp)) - goto update_records; + if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist)) + return; /* done receiving */ /* * Handle data packets: RTT sampling and monitoring p @@ -965,7 +963,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, bool, 0444); +module_param(ccid3_debug, bool, 0644); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 849e181e698f..5b3ce0688c5c 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ - for (i=0; i <= k; i++) { + if (k <= 0) + return; + + for (i = 0; i <= k; i++) { i_i = tfrc_lh_get_interval(lh, i); if (i < k) { @@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) i_tot1 += i_i * tfrc_lh_weights[i-1]; } - BUG_ON(w_tot == 0); lh->i_mean = max(i_tot0, i_tot1) / w_tot; } @@ -90,14 +92,14 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); u32 old_i_mean = lh->i_mean; - s64 length; + s64 len; if (cur == NULL) /* not initialised */ return 0; - length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq); + len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; - if (length - cur->li_length <= 0) /* duplicate or reordered */ + if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ return 0; if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) @@ -114,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ return 0; - cur->li_length = length; + cur->li_length = len; tfrc_lh_calc_i_mean(lh); return (lh->i_mean < old_i_mean); @@ -159,7 +161,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, else { cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno); new->li_length = dccp_delta_seqno(new->li_seqno, - tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno); + tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1; if (lh->counter > (2*LIH_SIZE)) lh->counter -= LIH_SIZE; diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 20af1a693427..6cc108afdc3b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -153,7 +153,7 @@ void tfrc_rx_packet_history_exit(void) static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, const struct sk_buff *skb, - const u32 ndp) + const u64 ndp) { const struct dccp_hdr *dh = dccp_hdr(skb); @@ -166,7 +166,7 @@ static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, - const u32 ndp) + const u64 ndp) { struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); @@ -206,31 +206,39 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) * * In the descriptions, `Si' refers to the sequence number of entry number i, * whose NDP count is `Ni' (lower case is used for variables). - * Note: All __after_loss functions expect that a test against duplicates has - * been performed already: the seqno of the skb must not be less than the - * seqno of loss_prev; and it must not equal that of any valid hist_entry. + * Note: All __xxx_loss functions expect that a test against duplicates has been + * performed already: the seqno of the skb must not be less than the seqno + * of loss_prev; and it must not equal that of any valid history entry. */ +static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) +{ + u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, + s1 = DCCP_SKB_CB(skb)->dccpd_seq; + + if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ + h->loss_count = 1; + tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); + } +} + static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = DCCP_SKB_CB(skb)->dccpd_seq; - int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, - d12 = dccp_delta_seqno(s1, s2), d2; - if (d12 > 0) { /* S1 < S2 */ + if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ h->loss_count = 2; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); return; } /* S0 < S2 < S1 */ - d2 = dccp_delta_seqno(s0, s2); - if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */ - int d21 = -d12; + if (dccp_loss_free(s0, s2, n2)) { + u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - if (d21 == 1 || n1 >= d21) { + if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ h->loss_count = 0; h->loss_start = tfrc_rx_hist_index(h, 1); @@ -238,9 +246,9 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); - } else { /* hole between S0 and S2 */ + } else { /* gap between S0 and S2 */ /* - * Reorder history to insert S2 between S0 and s1 + * Reorder history to insert S2 between S0 and S1 */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); @@ -256,22 +264,18 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = DCCP_SKB_CB(skb)->dccpd_seq; - int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, - d23 = dccp_delta_seqno(s2, s3), d13, d3, d31; - if (d23 > 0) { /* S2 < S3 */ + if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ h->loss_count = 3; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); return 1; } /* S3 < S2 */ - d13 = dccp_delta_seqno(s1, s3); - if (d13 > 0) { + if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ /* - * The sequence number order is S1, S3, S2 - * Reorder history to insert entry between S1 and S2 + * Reorder history to insert S3 between S1 and S2 */ tfrc_rx_hist_swap(h, 2, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); @@ -280,17 +284,15 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) } /* S0 < S3 < S1 */ - d31 = -d13; - d3 = dccp_delta_seqno(s0, s3); - if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */ + if (dccp_loss_free(s0, s3, n3)) { + u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - if (d31 == 1 || n1 >= d31) { + if (dccp_loss_free(s3, s1, n1)) { /* hole between S0 and S1 filled by S3 */ - int d2 = dccp_delta_seqno(s1, s2), - n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; + u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; - if (d2 == 1 || n2 >= d2) { + if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 0; @@ -307,8 +309,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) } /* - * The remaining case: S3 is not a successor of S0. - * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1 + * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 + * Reorder history to insert S3 between S0 and S1. */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); @@ -318,33 +320,25 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) return 1; } -/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */ -static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2) -{ - DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count); - - return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno, - tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno); -} - /* recycle RX history records to continue loss detection if necessary */ static void __three_after_loss(struct tfrc_rx_hist *h) { /* - * The distance between S0 and S1 is always greater than 1 and the NDP - * count of S1 is smaller than this distance. Otherwise there would - * have been no loss. Hence it is only necessary to see whether there - * are further missing data packets between S1/S2 and S2/S3. + * At this stage we know already that there is a gap between S0 and S1 + * (since S0 was the highest sequence number received before detecting + * the loss). To recycle the loss record, it is thus only necessary to + * check for other possible gaps between S1/S2 and between S2/S3. */ - int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2), - d3 = tfrc_rx_hist_delta_seqno(h, 2, 3), - n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, + u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, + s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, + s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; + u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; - if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */ + if (dccp_loss_free(s1, s2, n2)) { - if (d3 == 1 || n3 >= d3) { - /* S3 is successor of S2: entire hole is filled */ + if (dccp_loss_free(s2, s3, n3)) { + /* no gap between S2 and S3: entire hole is filled */ h->loss_start = tfrc_rx_hist_index(h, 3); h->loss_count = 0; } else { @@ -353,7 +347,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) h->loss_count = 1; } - } else { /* gap between S1 and S2 */ + } else { /* gap between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 2; } @@ -370,15 +364,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. + * Since it also takes care of reordering during loss detection and updates the + * records accordingly, the caller should not perform any more RX history + * operations when loss_count is greater than 0 after calling this function. */ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, - struct sk_buff *skb, u32 ndp, + struct sk_buff *skb, const u64 ndp, u32 (*calc_first_li)(struct sock *), struct sock *sk) { int is_new_loss = 0; - if (h->loss_count == 1) { + if (h->loss_count == 0) { + __do_track_loss(h, skb, ndp); + } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { DCCP_BUG("invalid loss_count %d", h->loss_count); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index c7eeda49cb20..461cc91cce88 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -64,7 +64,7 @@ struct tfrc_rx_hist_entry { u64 tfrchrx_seqno:48, tfrchrx_ccval:4, tfrchrx_type:4; - u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */ + u64 tfrchrx_ndp:48; ktime_t tfrchrx_tstamp; }; @@ -118,41 +118,21 @@ static inline struct tfrc_rx_hist_entry * return h->ring[h->loss_start]; } -/* initialise loss detection and disable RTT sampling */ -static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h) -{ - h->loss_count = 1; -} - /* indicate whether previously a packet was detected missing */ -static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) -{ - return h->loss_count; -} - -/* any data packets missing between last reception and skb ? */ -static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, - const struct sk_buff *skb, - u32 ndp) +static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) { - int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno, - DCCP_SKB_CB(skb)->dccpd_seq); - - if (delta > 1 && ndp < delta) - tfrc_rx_hist_loss_indicated(h); - - return tfrc_rx_hist_loss_pending(h); + return h->loss_count > 0; } extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, const u32 ndp); + const struct sk_buff *skb, const u64 ndp); extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, - struct sk_buff *skb, u32 ndp, + struct sk_buff *skb, const u64 ndp, u32 (*first_li)(struct sock *sk), struct sock *sk); extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 97ecec0a8e76..185916218e07 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -10,7 +10,7 @@ #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; -module_param(tfrc_debug, bool, 0444); +module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index f44d492d3b74..b4bc6e095a0e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -153,6 +153,21 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/** + * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 + * @s1: start sequence number + * @s2: end sequence number + * @ndp: NDP count on packet with sequence number @s2 + * Returns true if the sequence range s1...s2 has no data loss. + */ +static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) +{ + s64 delta = dccp_delta_seqno(s1, s2); + + WARN_ON(delta < 0); + return (u64)delta <= ndp + 1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ @@ -211,10 +226,11 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk); extern void dccp_send_ack(struct sock *sk); -extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk); +extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *rsk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); @@ -262,7 +278,7 @@ extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len); extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); -extern int dccp_destroy_sock(struct sock *sk); +extern void dccp_destroy_sock(struct sock *sk); extern void dccp_close(struct sock *sk, long timeout); extern struct sk_buff *dccp_make_response(struct sock *sk, diff --git a/net/dccp/input.c b/net/dccp/input.c index 08392ed86c25..779d0ed9ae94 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -370,7 +370,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, goto discard; if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); @@ -411,12 +411,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, struct dccp_sock *dp = dccp_sk(sk); long tstamp = dccp_timestamp(); - /* Stop the REQUEST timer */ - inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - BUG_TRAP(sk->sk_send_head != NULL); - __kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, " @@ -441,6 +435,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, DCCP_ACKVEC_STATE_RECEIVED)) goto out_invalid_packet; /* FIXME: change error code */ + /* Stop the REQUEST timer */ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + WARN_ON(sk->sk_send_head == NULL); + kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_update_gsr(sk, dp->dccps_isr); /* @@ -610,7 +610,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Step 8: Process options and mark acknowledgeable */ if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 37d27bcb361f..e3dfddab21cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -196,8 +196,8 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -205,17 +205,19 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) struct sock *sk; __u64 seq; int err; + struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(dev_net(skb->dev), &dccp_hashinfo, + sk = inet_lookup(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -229,7 +231,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; @@ -237,8 +239,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -282,10 +284,10 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* @@ -408,9 +410,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; } @@ -464,7 +466,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -809,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f7fe2a572d7b..11062780bb02 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -89,17 +89,27 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; + struct net *net = dev_net(skb->dev); - sk = inet6_lookup(dev_net(skb->dev), &dccp_hashinfo, + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); + return; + } + + sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -110,11 +120,19 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; + dp = dccp_sk(sk); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { @@ -167,7 +185,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; @@ -185,10 +202,10 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -629,9 +646,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return newsk; out_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); @@ -790,10 +807,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - &ipv6_hdr(skb)->saddr, dh->dccph_sport, - &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + sk = __inet6_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... @@ -1091,10 +1106,10 @@ static int dccp_v6_init_sock(struct sock *sk) return err; } -static int dccp_v6_destroy_sock(struct sock *sk) +static void dccp_v6_destroy_sock(struct sock *sk) { dccp_destroy_sock(sk); - return inet6_destroy_sock(sk); + inet6_destroy_sock(sk); } static struct timewait_sock_ops dccp6_timewait_sock_ops = { diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 66dca5bba858..b2804e2d1b8c 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -296,7 +296,8 @@ int dccp_child_process(struct sock *parent, struct sock *child, EXPORT_SYMBOL_GPL(dccp_child_process); -void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk) +void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *rsk) { DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); } diff --git a/net/dccp/options.c b/net/dccp/options.c index 43bc24e761d0..0809b63cb055 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, /* Check if this isn't a single byte option */ if (opt > DCCPO_MAX_RESERVED) { if (opt_ptr == opt_end) - goto out_invalid_option; + goto out_nonsensical_length; len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; + if (len < 2) + goto out_nonsensical_length; /* * Remove the type and len fields, leaving * just the value size @@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, opt_ptr += len; if (opt_ptr > opt_end) - goto out_invalid_option; + goto out_nonsensical_length; } /* @@ -124,12 +124,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, mandatory = 1; break; case DCCPO_NDP_COUNT: - if (len > 3) + if (len > 6) goto out_invalid_option; opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk), - opt_recv->dccpor_ndp); + dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), + (unsigned long long)opt_recv->dccpor_ndp); break; case DCCPO_CHANGE_L: /* fall through */ @@ -283,12 +283,17 @@ ignore_option: if (mandatory) goto out_invalid_option; +out_nonsensical_length: + /* RFC 4340, 5.8: ignore option and all remaining option space */ return 0; out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; + DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; + DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; return -1; } @@ -307,9 +312,11 @@ static void dccp_encode_value_var(const u32 value, unsigned char *to, *to++ = (value & 0xFF); } -static inline int dccp_ndp_len(const int ndp) +static inline u8 dccp_ndp_len(const u64 ndp) { - return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; + if (likely(ndp <= 0xFF)) + return 1; + return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } int dccp_insert_option(struct sock *sk, struct sk_buff *skb, @@ -336,7 +343,7 @@ EXPORT_SYMBOL_GPL(dccp_insert_option); static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - int ndp = dp->dccps_ndp_count; + u64 ndp = dp->dccps_ndp_count; if (dccp_non_data_packet(skb)) ++dp->dccps_ndp_count; diff --git a/net/dccp/output.c b/net/dccp/output.c index fe20068c5d8e..d06945c7d3df 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -53,8 +53,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; - - dccp_inc_seqno(&dp->dccps_gss); + /* + * Increment GSS here already in case the option code needs it. + * Update GSS for real only if option processing below succeeds. + */ + dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: @@ -66,6 +69,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: set_ack = 0; + /* Use ISS on the first (non-retransmitted) Request. */ + if (icsk->icsk_retransmits == 0) + dcb->dccpd_seq = dp->dccps_iss; /* fall through */ case DCCP_PKT_SYNC: @@ -84,8 +90,6 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - dcb->dccpd_seq = dp->dccps_gss; - if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; @@ -103,7 +107,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; - dp->dccps_awh = dp->dccps_gss; + dccp_update_gss(sk, dcb->dccpd_seq); dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); @@ -112,6 +116,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; + /* + * Limit Ack window to ISS <= P.ackno <= GSS, so that + * only Responses to Requests we sent are considered. + */ + dp->dccps_awl = dp->dccps_iss; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -284,14 +293,26 @@ void dccp_write_xmit(struct sock *sk, int block) } } -int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +/** + * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets + * There are only four retransmittable packet types in DCCP: + * - Request in client-REQUEST state (sec. 8.1.1), + * - CloseReq in server-CLOSEREQ state (sec. 8.3), + * - Close in node-CLOSING state (sec. 8.3), + * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()). + * This function expects sk->sk_send_head to contain the original skb. + */ +int dccp_retransmit_skb(struct sock *sk) { + WARN_ON(sk->sk_send_head == NULL); + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) return -EHOSTUNREACH; /* Routing failure or similar. */ - return dccp_transmit_skb(sk, (skb_cloned(skb) ? - pskb_copy(skb, GFP_ATOMIC): - skb_clone(skb, GFP_ATOMIC))); + /* this count is used to distinguish original and retransmitted skb */ + inet_csk(sk)->icsk_retransmits++; + + return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); } struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, @@ -437,19 +458,7 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* - * SWL and AWL are initially adjusted so that they are not less than - * the initial Sequence Numbers received and sent, respectively: - * SWL := max(GSR + 1 - floor(W/4), ISR), - * AWL := max(GSS - W' + 1, ISS). - * These adjustments MUST be applied only at the beginning of the - * connection. - */ - dccp_update_gss(sk, dp->dccps_iss); - dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); - - /* S.GAR - greatest valid acknowledgement number received on a non-Sync; - * initialized to S.ISS (sec. 8.5) */ + /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ dp->dccps_gar = dp->dccps_iss; icsk->icsk_retransmits = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9dfe2470962c..d0bd34819761 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -237,7 +237,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) EXPORT_SYMBOL_GPL(dccp_init_sock); -int dccp_destroy_sock(struct sock *sk) +void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_minisock *dmsk = dccp_msk(sk); @@ -268,8 +268,6 @@ int dccp_destroy_sock(struct sock *sk) /* clean up feature negotiation state */ dccp_feat_clean(dmsk); - - return 0; } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -311,7 +309,9 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; @@ -329,7 +329,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -476,6 +476,11 @@ static int dccp_setsockopt_change(struct sock *sk, int type, if (copy_from_user(&opt, optval, sizeof(opt))) return -EFAULT; + /* + * rfc4340: 6.1. Change Options + */ + if (opt.dccpsf_len < 1) + return -EINVAL; val = kmalloc(opt.dccpsf_len, GFP_KERNEL); if (!val) @@ -983,7 +988,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) @@ -1025,7 +1030,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, bool, 0444); +module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8703a792b560..54b3c7e9e016 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -99,21 +99,11 @@ static void dccp_retransmit_timer(struct sock *sk) } /* - * sk->sk_send_head has to have one skb with - * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP - * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ - BUG_TRAP(sk->sk_send_head != NULL); - - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ if (dccp_write_timeout(sk)) - goto out; + return; /* * We want to know the number of packets retransmitted, not the @@ -122,30 +112,28 @@ static void dccp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); - if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + if (dccp_retransmit_skb(sk) != 0) { /* * Retransmission failed because of local congestion, * do not backoff. */ - if (icsk->icsk_retransmits == 0) + if (--icsk->icsk_retransmits == 0) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), DCCP_RTO_MAX); - goto out; + return; } backoff: icsk->icsk_backoff++; - icsk->icsk_retransmits++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_dccp_retries1) __sk_dst_reset(sk); -out:; } static void dccp_write_timer(unsigned long data) @@ -224,7 +212,7 @@ static void dccp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out; @@ -254,7 +242,7 @@ static void dccp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } out: bh_unlock_sock(sk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index fc2efe899e91..3c23ab33dbc0 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -451,7 +451,7 @@ static void dn_destruct(struct sock *sk) static int dn_memory_pressure; -static void dn_enter_memory_pressure(void) +static void dn_enter_memory_pressure(struct sock *sk) { if (!dn_memory_pressure) { dn_memory_pressure = 1; @@ -1719,6 +1719,8 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, * See if there is data ready to read, sleep if there isn't */ for(;;) { + DEFINE_WAIT(wait); + if (sk->sk_err) goto out; @@ -1748,14 +1750,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - set_bit(SOCK_ASYNC_WAITDATA, &sock->flags); - SOCK_SLEEP_PRE(sk) - - if (!dn_data_ready(sk, queue, flags, target)) - schedule(); - - SOCK_SLEEP_POST(sk) - clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + finish_wait(sk->sk_sleep, &wait); } for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) { @@ -2002,18 +2001,19 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, * size. */ if (dn_queue_too_long(scp, queue, flags)) { + DEFINE_WAIT(wait); + if (flags & MSG_DONTWAIT) { err = -EWOULDBLOCK; goto out; } - SOCK_SLEEP_PRE(sk) - - if (dn_queue_too_long(scp, queue, flags)) - schedule(); - - SOCK_SLEEP_POST(sk) - + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_wait_event(sk, &timeo, + !dn_queue_too_long(scp, queue, flags)); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + finish_wait(sk->sk_sleep, &wait); continue; } @@ -2089,7 +2089,7 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch(event) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index f50e88bf2661..821bd1cdec04 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -580,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto dump_it; if (dn == NULL) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 5b7539b7fe0c..14fbca55e908 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -229,7 +229,7 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) return 0; } -static void dn_fib_rule_flush_cache(void) +static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) { dn_rt_cache_flush(-1); } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig new file mode 100644 index 000000000000..49211b35725b --- /dev/null +++ b/net/dsa/Kconfig @@ -0,0 +1,60 @@ +menuconfig NET_DSA + bool "Distributed Switch Architecture support" + default n + depends on EXPERIMENTAL && !S390 + select PHYLIB + ---help--- + This allows you to use hardware switch chips that use + the Distributed Switch Architecture. + + +if NET_DSA + +# tagging formats +config NET_DSA_TAG_DSA + bool + default n + +config NET_DSA_TAG_EDSA + bool + default n + +config NET_DSA_TAG_TRAILER + bool + default n + + +# switch drivers +config NET_DSA_MV88E6XXX + bool + default n + +config NET_DSA_MV88E6060 + bool "Marvell 88E6060 ethernet switch chip support" + select NET_DSA_TAG_TRAILER + ---help--- + This enables support for the Marvell 88E6060 ethernet switch + chip. + +config NET_DSA_MV88E6XXX_NEED_PPU + bool + default n + +config NET_DSA_MV88E6131 + bool "Marvell 88E6131 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_MV88E6XXX_NEED_PPU + select NET_DSA_TAG_DSA + ---help--- + This enables support for the Marvell 88E6131 ethernet switch + chip. + +config NET_DSA_MV88E6123_61_65 + bool "Marvell 88E6123/6161/6165 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_TAG_EDSA + ---help--- + This enables support for the Marvell 88E6123/6161/6165 + ethernet switch chips. + +endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile new file mode 100644 index 000000000000..2374faff4dea --- /dev/null +++ b/net/dsa/Makefile @@ -0,0 +1,13 @@ +# tagging formats +obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o +obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o + +# switch drivers +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o +obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o +obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o + +# the core +obj-$(CONFIG_NET_DSA) += dsa.o slave.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c new file mode 100644 index 000000000000..33e99462023a --- /dev/null +++ b/net/dsa/dsa.c @@ -0,0 +1,392 @@ +/* + * net/dsa/dsa.c - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +char dsa_driver_version[] = "0.1"; + + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +static struct dsa_switch_driver * +dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) +{ + struct dsa_switch_driver *ret; + struct list_head *list; + char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + + name = drv->probe(bus, sw_addr); + if (name != NULL) { + ret = drv; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + + +/* basic switch operations **************************************************/ +static struct dsa_switch * +dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, + struct mii_bus *bus, struct net_device *dev) +{ + struct dsa_switch *ds; + int ret; + struct dsa_switch_driver *drv; + char *name; + int i; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(bus, pd->sw_addr, &name); + if (drv == NULL) { + printk(KERN_ERR "%s: could not detect attached switch\n", + dev->name); + return ERR_PTR(-EINVAL); + } + printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return ERR_PTR(-ENOMEM); + + ds->pd = pd; + ds->master_netdev = dev; + ds->master_mii_bus = bus; + + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + + + /* + * Validate supplied switch configuration. + */ + ds->cpu_port = -1; + for (i = 0; i < DSA_MAX_PORTS; i++) { + char *name; + + name = pd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (ds->cpu_port != -1) { + printk(KERN_ERR "multiple cpu ports?!\n"); + ret = -EINVAL; + goto out; + } + ds->cpu_port = i; + } else { + ds->valid_port_mask |= 1 << i; + } + } + + if (ds->cpu_port == -1) { + printk(KERN_ERR "no cpu port?!\n"); + ret = -EINVAL; + goto out; + } + + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. (Which will + * discard received packets until we set ds->ports[] below.) + */ + wmb(); + dev->dsa_ptr = (void *)ds; + + + /* + * Do basic register setup. + */ + ret = drv->setup(ds); + if (ret < 0) + goto out; + + ret = drv->set_addr(ds, dev->dev_addr); + if (ret < 0) + goto out; + + ds->slave_mii_bus = mdiobus_alloc(); + if (ds->slave_mii_bus == NULL) { + ret = -ENOMEM; + goto out; + } + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + goto out_free; + + + /* + * Create network devices for physical switch ports. + */ + wmb(); + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *slave_dev; + + if (!(ds->valid_port_mask & (1 << i))) + continue; + + slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (slave_dev == NULL) { + printk(KERN_ERR "%s: can't create dsa slave " + "device for port %d(%s)\n", + dev->name, i, pd->port_names[i]); + continue; + } + + ds->ports[i] = slave_dev; + } + + return ds; + +out_free: + mdiobus_free(ds->slave_mii_bus); +out: + dev->dsa_ptr = NULL; + kfree(ds); + return ERR_PTR(ret); +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ +} + + +/* hooks for ethertype-less tagging formats *********************************/ +/* + * The original DSA tag format and some other tag formats have no + * ethertype, which means that we need to add a little hack to the + * networking receive path to make sure that received frames get + * the right ->protocol assigned to them when one of those tag + * formats is in use. + */ +bool dsa_uses_dsa_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_DSA)); +} + +bool dsa_uses_trailer_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); +} + + +/* link polling *************************************************************/ +static void dsa_link_poll_work(struct work_struct *ugly) +{ + struct dsa_switch *ds; + + ds = container_of(ugly, struct dsa_switch, link_poll_work); + + ds->drv->poll_link(ds); + mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); +} + +static void dsa_link_poll_timer(unsigned long _ds) +{ + struct dsa_switch *ds = (void *)_ds; + + schedule_work(&ds->link_poll_work); +} + + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +static struct mii_bus *dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} + +static struct net_device *dev_to_net_device(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "net"); + if (d != NULL) { + struct net_device *nd; + + nd = to_net_dev(d); + dev_hold(nd); + put_device(d); + + return nd; + } + + return NULL; +} + +static int dsa_probe(struct platform_device *pdev) +{ + static int dsa_version_printed; + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct mii_bus *bus; + struct dsa_switch *ds; + + if (!dsa_version_printed++) + printk(KERN_NOTICE "Distributed Switch Architecture " + "driver version %s\n", dsa_driver_version); + + if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) + return -EINVAL; + + bus = dev_to_mii_bus(pd->mii_bus); + if (bus == NULL) + return -EINVAL; + + dev = dev_to_net_device(pd->netdev); + if (dev == NULL) + return -EINVAL; + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + return -EEXIST; + } + + ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); + if (IS_ERR(ds)) { + dev_put(dev); + return PTR_ERR(ds); + } + + if (ds->drv->poll_link != NULL) { + INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); + init_timer(&ds->link_poll_timer); + ds->link_poll_timer.data = (unsigned long)ds; + ds->link_poll_timer.function = dsa_link_poll_timer; + ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); + add_timer(&ds->link_poll_timer); + } + + platform_set_drvdata(pdev, ds); + + return 0; +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch *ds = platform_get_drvdata(pdev); + + if (ds->drv->poll_link != NULL) + del_timer_sync(&ds->link_poll_timer); + + flush_scheduled_work(); + + dsa_switch_destroy(ds); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .owner = THIS_MODULE, + }, +}; + +static int __init dsa_init_module(void) +{ + return platform_driver_register(&dsa_driver); +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + platform_driver_unregister(&dsa_driver); +} +module_exit(dsa_cleanup_module); + +MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>") +MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dsa"); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h new file mode 100644 index 000000000000..7063378a1ebf --- /dev/null +++ b/net/dsa/dsa_priv.h @@ -0,0 +1,116 @@ +/* + * net/dsa/dsa_priv.h - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DSA_PRIV_H +#define __DSA_PRIV_H + +#include <linux/list.h> +#include <linux/phy.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <net/dsa.h> + +struct dsa_switch { + /* + * Configuration data for the platform device that owns + * this dsa switch instance. + */ + struct dsa_platform_data *pd; + + /* + * References to network device and mii bus to use. + */ + struct net_device *master_netdev; + struct mii_bus *master_mii_bus; + + /* + * The used switch driver and frame tagging type. + */ + struct dsa_switch_driver *drv; + __be16 tag_protocol; + + /* + * Slave mii_bus and devices for the individual ports. + */ + int cpu_port; + u32 valid_port_mask; + struct mii_bus *slave_mii_bus; + struct net_device *ports[DSA_MAX_PORTS]; + + /* + * Link state polling. + */ + struct work_struct link_poll_work; + struct timer_list link_poll_timer; +}; + +struct dsa_slave_priv { + struct net_device *dev; + struct dsa_switch *parent; + int port; + struct phy_device *phy; +}; + +struct dsa_switch_driver { + struct list_head list; + + __be16 tag_protocol; + int priv_size; + + /* + * Probing and setup. + */ + char *(*probe)(struct mii_bus *bus, int sw_addr); + int (*setup)(struct dsa_switch *ds); + int (*set_addr)(struct dsa_switch *ds, u8 *addr); + + /* + * Access to the switch's PHY registers. + */ + int (*phy_read)(struct dsa_switch *ds, int port, int regnum); + int (*phy_write)(struct dsa_switch *ds, int port, + int regnum, u16 val); + + /* + * Link state polling and IRQ handling. + */ + void (*poll_link)(struct dsa_switch *ds); + + /* + * ethtool hardware statistics. + */ + void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_ethtool_stats)(struct dsa_switch *ds, + int port, uint64_t *data); + int (*get_sset_count)(struct dsa_switch *ds); +}; + +/* dsa.c */ +extern char dsa_driver_version[]; +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); + +/* slave.c */ +void dsa_slave_mii_bus_init(struct dsa_switch *ds); +struct net_device *dsa_slave_create(struct dsa_switch *ds, + struct device *parent, + int port, char *name); + +/* tag_dsa.c */ +int dsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_edsa.c */ +int edsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_trailer.c */ +int trailer_xmit(struct sk_buff *skb, struct net_device *dev); + + +#endif diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c new file mode 100644 index 000000000000..54068ef251e8 --- /dev/null +++ b/net/dsa/mv88e6060.c @@ -0,0 +1,287 @@ +/* + * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +#define REG_PORT(p) (8 + (p)) +#define REG_GLOBAL 0x0f + +static int reg_read(struct dsa_switch *ds, int addr, int reg) +{ + return mdiobus_read(ds->master_mii_bus, addr, reg); +} + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + + +static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + return mdiobus_write(ds->master_mii_bus, addr, reg, val); +} + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + +static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = mdiobus_read(bus, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x0600) + return "Marvell 88E6060"; + } + + return NULL; +} + +static int mv88e6060_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 6; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0x8000) == 0x0000) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6060_setup_global(struct dsa_switch *ds) +{ + /* + * Disable discarding of frames with excessive collisions, + * set the maximum frame size to 1536 bytes, and mask all + * interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0800); + + /* + * Enable automatic address learning, set the address + * database size to 1024 entries, and set the default aging + * time to 5 minutes. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x2130); + + return 0; +} + +static int mv88e6060_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * Do not force flow control, disable Ingress and Egress + * Header tagging, disable VLAN tunneling, and set the port + * state to Forwarding. Additionally, if this is the CPU + * port, enable Ingress and Egress Trailer tagging mode. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + return 0; +} + +static int mv88e6060_setup(struct dsa_switch *ds) +{ + int i; + int ret; + + ret = mv88e6060_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise atu */ + + ret = mv88e6060_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6060_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +static int mv88e6060_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 5) + return port; + return -1; +} + +static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_read(ds, addr, regnum); +} + +static int +mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_write(ds, addr, regnum, val); +} + +static void mv88e6060_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x1000); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + speed = (port_status & 0x0100) ? 100 : 10; + duplex = (port_status & 0x0200) ? 1 : 0; + fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static struct dsa_switch_driver mv88e6060_switch_driver = { + .tag_protocol = htons(ETH_P_TRAILER), + .probe = mv88e6060_probe, + .setup = mv88e6060_setup, + .set_addr = mv88e6060_set_addr, + .phy_read = mv88e6060_phy_read, + .phy_write = mv88e6060_phy_write, + .poll_link = mv88e6060_poll_link, +}; + +int __init mv88e6060_init(void) +{ + register_switch_driver(&mv88e6060_switch_driver); + return 0; +} +module_init(mv88e6060_init); + +void __exit mv88e6060_cleanup(void) +{ + unregister_switch_driver(&mv88e6060_switch_driver); +} +module_exit(mv88e6060_cleanup); diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c new file mode 100644 index 000000000000..555b164082fc --- /dev/null +++ b/net/dsa/mv88e6123_61_65.c @@ -0,0 +1,421 @@ +/* + * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1210) + return "Marvell 88E6123"; + if (ret == 0x1610) + return "Marvell 88E6161"; + if (ret == 0x1650) + return "Marvell 88E6165"; + } + + return NULL; +} + +static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Disable the PHY polling unit (since there won't be any + * external PHYs to poll), don't discard packets with + * excessive collisions, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0000); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Configure the cpu port, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); + + /* + * Disable remote management for now, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Disable the loopback filter, disable flow control + * messages, disable flood broadcast override, disable + * removing of provider tags, disable ATU age violation + * interrupts, disable tag flow control, force flow + * control priority to the highest, and send all special + * multicast frames to the CPU at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Disable ingress rate limiting by resetting all ingress + * rate limit registers to their initial state. + */ + for (i = 0; i < 6; i++) + REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); + + /* + * Initialise cross-chip port VLAN table to reset defaults. + */ + REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); + + /* + * Clear the priority override table. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); + + /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + + return 0; +} + +static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Do not limit the period of time that this port can be + * paused for by the remote end or the period of time that + * this port can pause the remote end. + */ + REG_WRITE(addr, 0x02, 0x0000); + + /* + * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * configure the requested (DSA/EDSA) tagging mode if this is + * the CPU port, disable Header mode, enable IGMP/MLD snooping, + * disable VLAN tunneling, determine priority by looking at + * 802.1p and IP priority fields (IP prio has precedence), and + * set STP state to Forwarding. Finally, if this is the CPU + * port, additionally enable forwarding of unknown unicast and + * multicast addresses. + */ + REG_WRITE(addr, 0x04, + (p == ds->cpu_port) ? + (ds->tag_protocol == htons(ETH_P_DSA)) ? + 0x053f : 0x373f : + 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + REG_WRITE(addr, 0x08, 0x2080); + + /* + * Egress rate control: disable egress rate control. + */ + REG_WRITE(addr, 0x09, 0x0001); + + /* + * Egress rate control 2: disable egress rate control. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Port ATU control: disable limiting the number of address + * database entries that this port is allowed to use. + */ + REG_WRITE(addr, 0x0c, 0x0000); + + /* + * Priorit Override: disable DA, SA and VTU priority override. + */ + REG_WRITE(addr, 0x0d, 0x0000); + + /* + * Port Ethertype: use the Ethertype DSA Ethertype value. + */ + REG_WRITE(addr, 0x0f, ETH_P_EDSA); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6123_61_65_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mutex_init(&ps->stats_mutex); + + ret = mv88e6123_61_65_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6123_61_65_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6123_61_65_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6123_61_65_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 4) + return port; + return -1; +} + +static int +mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_read(ds, addr, regnum); +} + +static int +mv88e6123_61_65_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_write(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static void +mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6123_61_65_hw_stats); +} + +static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_EDSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6123_61_65_probe, + .setup = mv88e6123_61_65_setup, + .set_addr = mv88e6xxx_set_addr_indirect, + .phy_read = mv88e6123_61_65_phy_read, + .phy_write = mv88e6123_61_65_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6123_61_65_get_strings, + .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats, + .get_sset_count = mv88e6123_61_65_get_sset_count, +}; + +int __init mv88e6123_61_65_init(void) +{ + register_switch_driver(&mv88e6123_61_65_switch_driver); + return 0; +} +module_init(mv88e6123_61_65_init); + +void __exit mv88e6123_61_65_cleanup(void) +{ + unregister_switch_driver(&mv88e6123_61_65_switch_driver); +} +module_exit(mv88e6123_61_65_cleanup); diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c new file mode 100644 index 000000000000..36e01eb863a0 --- /dev/null +++ b/net/dsa/mv88e6131.c @@ -0,0 +1,380 @@ +/* + * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1060) + return "Marvell 88E6131"; + } + + return NULL; +} + +static int mv88e6131_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6131_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Enable the PHY polling unit, don't discard packets with + * excessive collisions, use a weighted fair queueing scheme + * to arbitrate between packet queues, set the maximum frame + * size to 1632, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x4400); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Set the VLAN ethertype to 0x8100. + */ + REG_WRITE(REG_GLOBAL, 0x19, 0x8100); + + /* + * Disable ARP mirroring, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); + + /* + * Disable cascade port functionality, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the higest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Force the priority of IGMP/MLD snoop frames and ARP frames + * to the highest setting. + */ + REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); + + return 0; +} + +static int mv88e6131_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Port Control: disable Core Tag, disable Drop-on-Lock, + * transmit frames unmodified, disable Header mode, + * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN + * tunneling, determine priority by looking at 802.1p and + * IP priority fields (IP prio has precedence), and set STP + * state to Forwarding. Finally, if this is the CPU port, + * additionally enable DSA tagging and forwarding of unknown + * unicast addresses. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, don't use + * VLAN-based, source address-based or destination + * address-based priority overrides, don't let the switch + * add or strip 802.1q tags, don't discard tagged or + * untagged frames on this port, do a destination address + * lookup on received packets as usual, don't send a copy + * of all transmitted/received frames on this port to the + * CPU, and configure the CPU port number. Also, if this + * is the CPU port, enable forwarding of unknown multicast + * addresses. + */ + REG_WRITE(addr, 0x08, + ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | + ds->cpu_port); + + /* + * Rate Control: disable ingress rate limiting. + */ + REG_WRITE(addr, 0x09, 0x0000); + + /* + * Rate Control 2: disable egress rate limiting. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6131_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mv88e6xxx_ppu_state_init(ds); + mutex_init(&ps->stats_mutex); + + ret = mv88e6131_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6131_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6131_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6131_port_to_phy_addr(int port) +{ + if (port >= 0 && port != 3 && port <= 7) + return port; + return -1; +} + +static int +mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_read_ppu(ds, addr, regnum); +} + +static int +mv88e6131_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static void +mv88e6131_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static int mv88e6131_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6131_hw_stats); +} + +static struct dsa_switch_driver mv88e6131_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_DSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6131_probe, + .setup = mv88e6131_setup, + .set_addr = mv88e6xxx_set_addr_direct, + .phy_read = mv88e6131_phy_read, + .phy_write = mv88e6131_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6131_get_strings, + .get_ethtool_stats = mv88e6131_get_ethtool_stats, + .get_sset_count = mv88e6131_get_sset_count, +}; + +int __init mv88e6131_init(void) +{ + register_switch_driver(&mv88e6131_switch_driver); + return 0; +} +module_init(mv88e6131_init); + +void __exit mv88e6131_cleanup(void) +{ + unregister_switch_driver(&mv88e6131_switch_driver); +} +module_exit(mv88e6131_cleanup); diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c new file mode 100644 index 000000000000..aa6c609c59f2 --- /dev/null +++ b/net/dsa/mv88e6xxx.c @@ -0,0 +1,522 @@ +/* + * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +/* + * If the switch's ADDR[4:0] strap pins are strapped to zero, it will + * use all 32 SMI bus addresses on its SMI bus, and all switch registers + * will be directly accessible on some {device address,register address} + * pair. If the ADDR[4:0] pins are not strapped to zero, the switch + * will only respond to SMI transactions to that specific address, and + * an indirect addressing mechanism needs to be used to access its + * registers. + */ +static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) +{ + int ret; + int i; + + for (i = 0; i < 16; i++) { + ret = mdiobus_read(bus, sw_addr, 0); + if (ret < 0) + return ret; + + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_read(bus, addr, reg); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the read command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the read command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Read the data. + */ + ret = mdiobus_read(bus, sw_addr, 1); + if (ret < 0) + return ret; + + return ret & 0xffff; +} + +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_read(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_write(bus, addr, reg, val); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the data to write. + */ + ret = mdiobus_write(bus, sw_addr, 1, val); + if (ret < 0) + return ret; + + /* + * Transmit the write command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the write command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + return 0; +} + +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_write(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg, val); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_config_prio(struct dsa_switch *ds) +{ + /* + * Configure the IP ToS mapping registers. + */ + REG_WRITE(REG_GLOBAL, 0x10, 0x0000); + REG_WRITE(REG_GLOBAL, 0x11, 0x0000); + REG_WRITE(REG_GLOBAL, 0x12, 0x5555); + REG_WRITE(REG_GLOBAL, 0x13, 0x5555); + REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x16, 0xffff); + REG_WRITE(REG_GLOBAL, 0x17, 0xffff); + + /* + * Configure the IEEE 802.1p priority mapping register. + */ + REG_WRITE(REG_GLOBAL, 0x18, 0xfa41); + + return 0; +} + +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) +{ + int i; + int ret; + + for (i = 0; i < 6; i++) { + int j; + + /* + * Write the MAC address byte. + */ + REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]); + + /* + * Wait for the write to complete. + */ + for (j = 0; j < 16; j++) { + ret = REG_READ(REG_GLOBAL2, 0x0d); + if ((ret & 0x8000) == 0) + break; + } + if (j == 16) + return -ETIMEDOUT; + } + + return 0; +} + +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) +{ + if (addr >= 0) + return mv88e6xxx_reg_read(ds, addr, regnum); + return 0xffff; +} + +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) +{ + if (addr >= 0) + return mv88e6xxx_reg_write(ds, addr, regnum, val); + return 0; +} + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU +static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) != 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_ppu_enable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) == 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly) +{ + struct mv88e6xxx_priv_state *ps; + + ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work); + if (mutex_trylock(&ps->ppu_mutex)) { + struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1; + + if (mv88e6xxx_ppu_enable(ds) == 0) + ps->ppu_disabled = 0; + mutex_unlock(&ps->ppu_mutex); + } +} + +static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps) +{ + struct mv88e6xxx_priv_state *ps = (void *)_ps; + + schedule_work(&ps->ppu_work); +} + +static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->ppu_mutex); + + /* + * If the PHY polling unit is enabled, disable it so that + * we can access the PHY registers. If it was already + * disabled, cancel the timer that is going to re-enable + * it. + */ + if (!ps->ppu_disabled) { + ret = mv88e6xxx_ppu_disable(ds); + if (ret < 0) { + mutex_unlock(&ps->ppu_mutex); + return ret; + } + ps->ppu_disabled = 1; + } else { + del_timer(&ps->ppu_timer); + ret = 0; + } + + return ret; +} + +static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + /* + * Schedule a timer to re-enable the PHY polling unit. + */ + mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10)); + mutex_unlock(&ps->ppu_mutex); +} + +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + mutex_init(&ps->ppu_mutex); + INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work); + init_timer(&ps->ppu_timer); + ps->ppu_timer.data = (unsigned long)ps; + ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; +} + +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_read(ds, addr, regnum); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} + +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_write(ds, addr, regnum, val); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} +#endif + +void mv88e6xxx_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x0800); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + switch (port_status & 0x0300) { + case 0x0000: + speed = 10; + break; + case 0x0100: + speed = 100; + break; + case 0x0200: + speed = 1000; + break; + default: + speed = -1; + break; + } + duplex = (port_status & 0x0400) ? 1 : 0; + fc = (port_status & 0x8000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +{ + int ret; + int i; + + for (i = 0; i < 10; i++) { + ret = REG_READ(REG_GLOBAL2, 0x1d); + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +{ + int ret; + + /* + * Snapshot the hardware statistics counters for this port. + */ + REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port); + + /* + * Wait for the snapshotting to complete. + */ + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return ret; + + return 0; +} + +static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +{ + u32 _val; + int ret; + + *val = 0; + + ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat); + if (ret < 0) + return; + + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e); + if (ret < 0) + return; + + _val = ret << 16; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f); + if (ret < 0) + return; + + *val = _val | ret; +} + +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data) +{ + int i; + + for (i = 0; i < nr_stats; i++) { + memcpy(data + i * ETH_GSTRING_LEN, + stats[i].string, ETH_GSTRING_LEN); + } +} + +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + int i; + + mutex_lock(&ps->stats_mutex); + + ret = mv88e6xxx_stats_snapshot(ds, port); + if (ret < 0) { + mutex_unlock(&ps->stats_mutex); + return; + } + + /* + * Read each of the counters. + */ + for (i = 0; i < nr_stats; i++) { + struct mv88e6xxx_hw_stat *s = stats + i; + u32 low; + u32 high; + + mv88e6xxx_stats_read(ds, s->reg, &low); + if (s->sizeof_stat == 8) + mv88e6xxx_stats_read(ds, s->reg + 1, &high); + else + high = 0; + + data[i] = (((u64)high) << 32) | low; + } + + mutex_unlock(&ps->stats_mutex); +} diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..eb0e0aaa9f1b --- /dev/null +++ b/net/dsa/mv88e6xxx.h @@ -0,0 +1,93 @@ +/* + * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __MV88E6XXX_H +#define __MV88E6XXX_H + +#define REG_PORT(p) (0x10 + (p)) +#define REG_GLOBAL 0x1b +#define REG_GLOBAL2 0x1c + +struct mv88e6xxx_priv_state { + /* + * When using multi-chip addressing, this mutex protects + * access to the indirect access registers. (In single-chip + * mode, this mutex is effectively useless.) + */ + struct mutex smi_mutex; + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU + /* + * Handles automatic disabling and re-enabling of the PHY + * polling unit. + */ + struct mutex ppu_mutex; + int ppu_disabled; + struct work_struct ppu_work; + struct timer_list ppu_timer; +#endif + + /* + * This mutex serialises access to the statistics unit. + * Hold this mutex over snapshot + dump sequences. + */ + struct mutex stats_mutex; +}; + +struct mv88e6xxx_hw_stat { + char string[ETH_GSTRING_LEN]; + int sizeof_stat; + int reg; +}; + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val); +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); +int mv88e6xxx_config_prio(struct dsa_switch *ds); +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val); +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds); +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val); +void mv88e6xxx_poll_link(struct dsa_switch *ds); +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data); +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data); + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + + + +#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c new file mode 100644 index 000000000000..37616884b8a9 --- /dev/null +++ b/net/dsa/slave.c @@ -0,0 +1,298 @@ +/* + * net/dsa/slave.c - Slave device handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +/* slave mii_bus handling ***************************************************/ +static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_read(ds, addr, reg); + + return 0xffff; +} + +static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_write(ds, addr, reg, val); + + return 0; +} + +void dsa_slave_mii_bus_init(struct dsa_switch *ds) +{ + ds->slave_mii_bus->priv = (void *)ds; + ds->slave_mii_bus->name = "dsa slave smi"; + ds->slave_mii_bus->read = dsa_slave_phy_read; + ds->slave_mii_bus->write = dsa_slave_phy_write; + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", + ds->master_mii_bus->id, ds->pd->sw_addr); + ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); +} + + +/* slave device handling ****************************************************/ +static int dsa_slave_open(struct net_device *dev) +{ + return 0; +} + +static int dsa_slave_close(struct net_device *dev) +{ + return 0; +} + +static void dsa_slave_change_rx_flags(struct net_device *dev, int change) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); +} + +static void dsa_slave_set_rx_mode(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_sync(master, dev); + dev_unicast_sync(master, dev); +} + +static int dsa_slave_set_mac_address(struct net_device *dev, void *addr) +{ + memcpy(dev->dev_addr, addr + 2, 6); + + return 0; +} + +static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct mii_ioctl_data *mii_data = if_mii(ifr); + + if (p->phy != NULL) + return phy_mii_ioctl(p->phy, mii_data, cmd); + + return -EOPNOTSUPP; +} + + +/* ethtool operations *******************************************************/ +static int +dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + int err; + + err = -EOPNOTSUPP; + if (p->phy != NULL) { + err = phy_read_status(p->phy); + if (err == 0) + err = phy_ethtool_gset(p->phy, cmd); + } + + return err; +} + +static int +dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return phy_ethtool_sset(p->phy, cmd); + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, "dsa", 32); + strncpy(drvinfo->version, dsa_driver_version, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, "platform", 32); +} + +static int dsa_slave_nway_reset(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return genphy_restart_aneg(p->phy); + + return -EOPNOTSUPP; +} + +static u32 dsa_slave_get_link(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) { + genphy_update_link(p->phy); + return p->phy->link; + } + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (stringset == ETH_SS_STATS) { + int len = ETH_GSTRING_LEN; + + strncpy(data, "tx_packets", len); + strncpy(data + len, "tx_bytes", len); + strncpy(data + 2 * len, "rx_packets", len); + strncpy(data + 3 * len, "rx_bytes", len); + if (ds->drv->get_strings != NULL) + ds->drv->get_strings(ds, p->port, data + 4 * len); + } +} + +static void dsa_slave_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + data[0] = p->dev->stats.tx_packets; + data[1] = p->dev->stats.tx_bytes; + data[2] = p->dev->stats.rx_packets; + data[3] = p->dev->stats.rx_bytes; + if (ds->drv->get_ethtool_stats != NULL) + ds->drv->get_ethtool_stats(ds, p->port, data + 4); +} + +static int dsa_slave_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (sset == ETH_SS_STATS) { + int count; + + count = 4; + if (ds->drv->get_sset_count != NULL) + count += ds->drv->get_sset_count(ds); + + return count; + } + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops dsa_slave_ethtool_ops = { + .get_settings = dsa_slave_get_settings, + .set_settings = dsa_slave_set_settings, + .get_drvinfo = dsa_slave_get_drvinfo, + .nway_reset = dsa_slave_nway_reset, + .get_link = dsa_slave_get_link, + .set_sg = ethtool_op_set_sg, + .get_strings = dsa_slave_get_strings, + .get_ethtool_stats = dsa_slave_get_ethtool_stats, + .get_sset_count = dsa_slave_get_sset_count, +}; + + +/* slave device setup *******************************************************/ +struct net_device * +dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) +{ + struct net_device *master = ds->master_netdev; + struct net_device *slave_dev; + struct dsa_slave_priv *p; + int ret; + + slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), + name, ether_setup); + if (slave_dev == NULL) + return slave_dev; + + slave_dev->features = master->vlan_features; + SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); + slave_dev->tx_queue_len = 0; + switch (ds->tag_protocol) { +#ifdef CONFIG_NET_DSA_TAG_DSA + case htons(ETH_P_DSA): + slave_dev->hard_start_xmit = dsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA + case htons(ETH_P_EDSA): + slave_dev->hard_start_xmit = edsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + case htons(ETH_P_TRAILER): + slave_dev->hard_start_xmit = trailer_xmit; + break; +#endif + default: + BUG(); + } + slave_dev->open = dsa_slave_open; + slave_dev->stop = dsa_slave_close; + slave_dev->change_rx_flags = dsa_slave_change_rx_flags; + slave_dev->set_rx_mode = dsa_slave_set_rx_mode; + slave_dev->set_multicast_list = dsa_slave_set_rx_mode; + slave_dev->set_mac_address = dsa_slave_set_mac_address; + slave_dev->do_ioctl = dsa_slave_ioctl; + SET_NETDEV_DEV(slave_dev, parent); + slave_dev->vlan_features = master->vlan_features; + + p = netdev_priv(slave_dev); + p->dev = slave_dev; + p->parent = ds; + p->port = port; + p->phy = ds->slave_mii_bus->phy_map[port]; + + ret = register_netdev(slave_dev); + if (ret) { + printk(KERN_ERR "%s: error %d registering interface %s\n", + master->name, ret, slave_dev->name); + free_netdev(slave_dev); + return NULL; + } + + netif_carrier_off(slave_dev); + + if (p->phy != NULL) { + phy_attach(slave_dev, p->phy->dev.bus_id, + 0, PHY_INTERFACE_MODE_GMII); + + p->phy->autoneg = AUTONEG_ENABLE; + p->phy->speed = 0; + p->phy->duplex = 0; + p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; + phy_start_aneg(p->phy); + } + + return slave_dev; +} diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c new file mode 100644 index 000000000000..bdc0510b53b7 --- /dev/null +++ b/net/dsa/tag_dsa.c @@ -0,0 +1,194 @@ +/* + * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 + +int dsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *dsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag for tagged + * packets, or insert a DSA tag between the addresses and + * the ethertype field for untagged packets. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x60; + dsa_header[1] = p->port << 3; + + /* + * Move CFI field from byte 2 to byte 1. + */ + if (dsa_header[2] & 0x10) { + dsa_header[1] |= 0x01; + dsa_header[2] &= ~0x10; + } + } else { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x40; + dsa_header[1] = p->port << 3; + dsa_header[2] = 0x00; + dsa_header[3] = 0x00; + } + + skb->protocol = htons(ETH_P_DSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *dsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + goto out_drop; + + /* + * The ethertype field is part of the DSA header. + */ + dsa_header = skb->data - 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (dsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * Convert the DSA header to an 802.1q header if the 'tagged' + * bit in the DSA header is set. If the 'tagged' bit is clear, + * delete the DSA header entirely. + */ + if (dsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = dsa_header[2] & ~0x10; + new_header[3] = dsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (dsa_header[1] & 0x01) + new_header[2] |= 0x10; + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(dsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(dsa_header, new_header, DSA_HLEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, DSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type dsa_packet_type = { + .type = __constant_htons(ETH_P_DSA), + .func = dsa_rcv, +}; + +static int __init dsa_init_module(void) +{ + dev_add_pack(&dsa_packet_type); + return 0; +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + dev_remove_pack(&dsa_packet_type); +} +module_exit(dsa_cleanup_module); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c new file mode 100644 index 000000000000..f985ea993843 --- /dev/null +++ b/net/dsa/tag_edsa.c @@ -0,0 +1,213 @@ +/* + * net/dsa/tag_edsa.c - Ethertype DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 +#define EDSA_HLEN 8 + +int edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *edsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag and prepend + * a DSA ethertype field is the packet is tagged, or insert + * a DSA ethertype plus DSA tag between the addresses and the + * current ethertype field if the packet is untagged. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x60; + edsa_header[5] = p->port << 3; + + /* + * Move CFI field from byte 6 to byte 5. + */ + if (edsa_header[6] & 0x10) { + edsa_header[5] |= 0x01; + edsa_header[6] &= ~0x10; + } + } else { + if (skb_cow_head(skb, EDSA_HLEN) < 0) + goto out_free; + skb_push(skb, EDSA_HLEN); + + memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x40; + edsa_header[5] = p->port << 3; + edsa_header[6] = 0x00; + edsa_header[7] = 0x00; + } + + skb->protocol = htons(ETH_P_EDSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *edsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + goto out_drop; + + /* + * Skip the two null bytes after the ethertype. + */ + edsa_header = skb->data + 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (edsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * If the 'tagged' bit is set, convert the DSA tag to a 802.1q + * tag and delete the ethertype part. If the 'tagged' bit is + * clear, delete the ethertype and the DSA tag parts. + */ + if (edsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = edsa_header[2] & ~0x10; + new_header[3] = edsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (edsa_header[1] & 0x01) + new_header[2] |= 0x10; + + skb_pull_rcsum(skb, DSA_HLEN); + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(edsa_header, new_header, DSA_HLEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, EDSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - EDSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type edsa_packet_type = { + .type = __constant_htons(ETH_P_EDSA), + .func = edsa_rcv, +}; + +static int __init edsa_init_module(void) +{ + dev_add_pack(&edsa_packet_type); + return 0; +} +module_init(edsa_init_module); + +static void __exit edsa_cleanup_module(void) +{ + dev_remove_pack(&edsa_packet_type); +} +module_exit(edsa_cleanup_module); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c new file mode 100644 index 000000000000..d3117764b2c2 --- /dev/null +++ b/net/dsa/tag_trailer.c @@ -0,0 +1,130 @@ +/* + * net/dsa/tag_trailer.c - Trailer tag format handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +int trailer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *trailer; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * We have to make sure that the trailer ends up as the very + * last 4 bytes of the packet. This means that we have to pad + * the packet to the minimum ethernet frame size, if necessary, + * before adding the trailer. + */ + padlen = 0; + if (skb->len < 60) + padlen = 60 - skb->len; + + nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); + if (nskb == NULL) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + + if (padlen) { + u8 *pad = skb_put(nskb, padlen); + memset(pad, 0, padlen); + } + + trailer = skb_put(nskb, 4); + trailer[0] = 0x80; + trailer[1] = 1 << p->port; + trailer[2] = 0x10; + trailer[3] = 0x00; + + nskb->protocol = htons(ETH_P_TRAILER); + + nskb->dev = p->parent->master_netdev; + dev_queue_xmit(nskb); + + return NETDEV_TX_OK; +} + +static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *trailer; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (skb_linearize(skb)) + goto out_drop; + + trailer = skb_tail_pointer(skb) - 4; + if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || + (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) + goto out_drop; + + source_port = trailer[1] & 7; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + pskb_trim_rcsum(skb, skb->len - 4); + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type trailer_packet_type = { + .type = __constant_htons(ETH_P_TRAILER), + .func = trailer_rcv, +}; + +static int __init trailer_init_module(void) +{ + dev_add_pack(&trailer_packet_type); + return 0; +} +module_init(trailer_init_module); + +static void __exit trailer_cleanup_module(void) +{ + dev_remove_pack(&trailer_packet_type); +} +module_exit(trailer_cleanup_module); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 7c9bb13b1539..8789d2bb1b06 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -573,9 +573,7 @@ static int econet_release(struct socket *sock) sk->sk_state_change(sk); /* It is useless. Just for sanity. */ - sock->sk = NULL; - sk->sk_socket = NULL; - sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); /* Purge queues */ @@ -1064,7 +1062,7 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct sock *sk; struct ec_device *edev = dev->ec_ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; if (skb->pkt_type == PACKET_OTHERHOST) @@ -1121,7 +1119,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch (msg) { diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a80839b02e3f..b9d85af2dd31 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <net/ip.h> +#include <net/dsa.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -129,7 +130,7 @@ int eth_rebuild_header(struct sk_buff *skb) switch (eth->h_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return arp_find(eth->h_dest, skb); #endif default: @@ -184,6 +185,17 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->pkt_type = PACKET_OTHERHOST; } + /* + * Some variants of DSA tagging don't have an ethertype field + * at all, so we check here whether one of those tagging + * variants has been configured on the receiving interface, + * and if so, set skb->protocol without looking at the packet. + */ + if (netdev_uses_dsa_tags(dev)) + return htons(ETH_P_DSA); + if (netdev_uses_trailer_tags(dev)) + return htons(ETH_P_TRAILER); + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 3bca97f55d47..949772a5a7dc 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -157,7 +157,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) err = ieee80211_networks_allocate(ieee); if (err) { IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); - goto failed; + goto failed_free_netdev; } ieee80211_networks_initialize(ieee); @@ -193,9 +193,9 @@ struct net_device *alloc_ieee80211(int sizeof_priv) return dev; - failed: - if (dev) - free_netdev(dev); +failed_free_netdev: + free_netdev(dev); +failed: return NULL; } diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 200ee1e63728..69dbc342a464 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -391,7 +391,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, wstats.updated = 0; if (rx_stats->mask & IEEE80211_STATMASK_RSSI) { - wstats.level = rx_stats->rssi; + wstats.level = rx_stats->signal; wstats.updated |= IW_QUAL_LEVEL_UPDATED; } else wstats.updated |= IW_QUAL_LEVEL_INVALID; diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index d8b02603cbe5..d996547f7a62 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -542,90 +542,4 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) return 1; } -/* Incoming 802.11 strucure is converted to a TXB - * a block of 802.11 fragment packets (stored as skbs) */ -int ieee80211_tx_frame(struct ieee80211_device *ieee, - struct ieee80211_hdr *frame, int hdr_len, int total_len, - int encrypt_mpdu) -{ - struct ieee80211_txb *txb = NULL; - unsigned long flags; - struct net_device_stats *stats = &ieee->stats; - struct sk_buff *skb_frag; - int priority = -1; - int fraglen = total_len; - int headroom = ieee->tx_headroom; - struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; - - spin_lock_irqsave(&ieee->lock, flags); - - if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt)) - encrypt_mpdu = 0; - - /* If there is no driver handler to take the TXB, dont' bother - * creating it... */ - if (!ieee->hard_start_xmit) { - printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); - goto success; - } - - if (unlikely(total_len < 24)) { - printk(KERN_WARNING "%s: skb too small (%d).\n", - ieee->dev->name, total_len); - goto success; - } - - if (encrypt_mpdu) { - frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - fraglen += crypt->ops->extra_mpdu_prefix_len + - crypt->ops->extra_mpdu_postfix_len; - headroom += crypt->ops->extra_mpdu_prefix_len; - } - - /* When we allocate the TXB we allocate enough space for the reserve - * and full fragment bytes (bytes_per_frag doesn't include prefix, - * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC); - if (unlikely(!txb)) { - printk(KERN_WARNING "%s: Could not allocate TXB\n", - ieee->dev->name); - goto failed; - } - txb->encrypted = 0; - txb->payload_size = fraglen; - - skb_frag = txb->fragments[0]; - - memcpy(skb_put(skb_frag, total_len), frame, total_len); - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - - /* To avoid overcomplicating things, we do the corner-case frame - * encryption in software. The only real situation where encryption is - * needed here is during software-based shared key authentication. */ - if (encrypt_mpdu) - ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); - - success: - spin_unlock_irqrestore(&ieee->lock, flags); - - if (txb) { - if ((*ieee->hard_start_xmit) (txb, ieee->dev, priority) == 0) { - stats->tx_packets++; - stats->tx_bytes += txb->payload_size; - return 0; - } - ieee80211_txb_free(txb); - } - return 0; - - failed: - spin_unlock_irqrestore(&ieee->lock, flags); - stats->tx_errors++; - return 1; -} - -EXPORT_SYMBOL(ieee80211_tx_frame); EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 623489afa62c..973832dd7faf 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -43,8 +43,9 @@ static const char *ieee80211_modes[] = { #define MAX_CUSTOM_LEN 64 static char *ieee80211_translate_scan(struct ieee80211_device *ieee, - char *start, char *stop, - struct ieee80211_network *network) + char *start, char *stop, + struct ieee80211_network *network, + struct iw_request_info *info) { char custom[MAX_CUSTOM_LEN]; char *p; @@ -57,7 +58,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.cmd = SIOCGIWAP; iwe.u.ap_addr.sa_family = ARPHRD_ETHER; memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN); - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN); /* Remaining entries will be displayed in the order we provide them */ @@ -66,17 +67,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.data.flags = 1; if (network->flags & NETWORK_EMPTY_ESSID) { iwe.u.data.length = sizeof("<hidden>"); - start = iwe_stream_add_point(start, stop, &iwe, "<hidden>"); + start = iwe_stream_add_point(info, start, stop, + &iwe, "<hidden>"); } else { iwe.u.data.length = min(network->ssid_len, (u8) 32); - start = iwe_stream_add_point(start, stop, &iwe, network->ssid); + start = iwe_stream_add_point(info, start, stop, + &iwe, network->ssid); } /* Add the protocol name */ iwe.cmd = SIOCGIWNAME; snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", ieee80211_modes[network->mode]); - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN); /* Add mode */ iwe.cmd = SIOCGIWMODE; @@ -86,7 +89,8 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, else iwe.u.mode = IW_MODE_ADHOC; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN); + start = iwe_stream_add_event(info, start, stop, + &iwe, IW_EV_UINT_LEN); } /* Add channel and frequency */ @@ -95,7 +99,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); iwe.u.freq.e = 6; iwe.u.freq.i = 0; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN); /* Add encryption capability */ iwe.cmd = SIOCGIWENCODE; @@ -104,12 +108,13 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, else iwe.u.data.flags = IW_ENCODE_DISABLED; iwe.u.data.length = 0; - start = iwe_stream_add_point(start, stop, &iwe, network->ssid); + start = iwe_stream_add_point(info, start, stop, + &iwe, network->ssid); /* Add basic and extended rates */ /* Rate : stuffing multiple values in a single event require a bit * more of magic - Jean II */ - current_val = start + IW_EV_LCP_LEN; + current_val = start + iwe_stream_lcp_len(info); iwe.cmd = SIOCGIWRATE; /* Those two flags are ignored... */ iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; @@ -124,17 +129,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, /* Bit rate given in 500 kb/s units (+ 0x80) */ iwe.u.bitrate.value = ((rate & 0x7f) * 500000); /* Add new value to event */ - current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); + current_val = iwe_stream_add_value(info, start, current_val, + stop, &iwe, IW_EV_PARAM_LEN); } for (; j < network->rates_ex_len; j++) { rate = network->rates_ex[j] & 0x7F; /* Bit rate given in 500 kb/s units (+ 0x80) */ iwe.u.bitrate.value = ((rate & 0x7f) * 500000); /* Add new value to event */ - current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); + current_val = iwe_stream_add_value(info, start, current_val, + stop, &iwe, IW_EV_PARAM_LEN); } /* Check if we added any rate */ - if((current_val - start) > IW_EV_LCP_LEN) + if ((current_val - start) > iwe_stream_lcp_len(info)) start = current_val; /* Add quality statistics */ @@ -181,14 +188,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, iwe.u.qual.level = network->stats.signal; } - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN); + start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN); iwe.cmd = IWEVCUSTOM; p = custom; iwe.u.data.length = p - custom; if (iwe.u.data.length) - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); memset(&iwe, 0, sizeof(iwe)); if (network->wpa_ie_len) { @@ -196,7 +203,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, memcpy(buf, network->wpa_ie, network->wpa_ie_len); iwe.cmd = IWEVGENIE; iwe.u.data.length = network->wpa_ie_len; - start = iwe_stream_add_point(start, stop, &iwe, buf); + start = iwe_stream_add_point(info, start, stop, &iwe, buf); } memset(&iwe, 0, sizeof(iwe)); @@ -205,7 +212,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, memcpy(buf, network->rsn_ie, network->rsn_ie_len); iwe.cmd = IWEVGENIE; iwe.u.data.length = network->rsn_ie_len; - start = iwe_stream_add_point(start, stop, &iwe, buf); + start = iwe_stream_add_point(info, start, stop, &iwe, buf); } /* Add EXTRA: Age to display seconds since last beacon/probe response @@ -217,7 +224,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, jiffies_to_msecs(jiffies - network->last_scanned)); iwe.u.data.length = p - custom; if (iwe.u.data.length) - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); /* Add spectrum management information */ iwe.cmd = -1; @@ -238,7 +245,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, if (iwe.cmd == IWEVCUSTOM) { iwe.u.data.length = p - custom; - start = iwe_stream_add_point(start, stop, &iwe, custom); + start = iwe_stream_add_point(info, start, stop, &iwe, custom); } return start; @@ -272,7 +279,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, if (ieee->scan_age == 0 || time_after(network->last_scanned + ieee->scan_age, jiffies)) - ev = ieee80211_translate_scan(ieee, ev, stop, network); + ev = ieee80211_translate_scan(ieee, ev, stop, network, + info); else IEEE80211_DEBUG_SCAN("Not showing network '%s (" "%s)' due to age (%dms).\n", @@ -744,98 +752,9 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, return 0; } -int ieee80211_wx_set_auth(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *wrqu, - char *extra) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&ieee->lock, flags); - - switch (wrqu->param.flags & IW_AUTH_INDEX) { - case IW_AUTH_WPA_VERSION: - case IW_AUTH_CIPHER_PAIRWISE: - case IW_AUTH_CIPHER_GROUP: - case IW_AUTH_KEY_MGMT: - /* - * Host AP driver does not use these parameters and allows - * wpa_supplicant to control them internally. - */ - break; - case IW_AUTH_TKIP_COUNTERMEASURES: - break; /* FIXME */ - case IW_AUTH_DROP_UNENCRYPTED: - ieee->drop_unencrypted = !!wrqu->param.value; - break; - case IW_AUTH_80211_AUTH_ALG: - break; /* FIXME */ - case IW_AUTH_WPA_ENABLED: - ieee->privacy_invoked = ieee->wpa_enabled = !!wrqu->param.value; - break; - case IW_AUTH_RX_UNENCRYPTED_EAPOL: - ieee->ieee802_1x = !!wrqu->param.value; - break; - case IW_AUTH_PRIVACY_INVOKED: - ieee->privacy_invoked = !!wrqu->param.value; - break; - default: - err = -EOPNOTSUPP; - break; - } - spin_unlock_irqrestore(&ieee->lock, flags); - return err; -} - -int ieee80211_wx_get_auth(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *wrqu, - char *extra) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&ieee->lock, flags); - - switch (wrqu->param.flags & IW_AUTH_INDEX) { - case IW_AUTH_WPA_VERSION: - case IW_AUTH_CIPHER_PAIRWISE: - case IW_AUTH_CIPHER_GROUP: - case IW_AUTH_KEY_MGMT: - case IW_AUTH_TKIP_COUNTERMEASURES: /* FIXME */ - case IW_AUTH_80211_AUTH_ALG: /* FIXME */ - /* - * Host AP driver does not use these parameters and allows - * wpa_supplicant to control them internally. - */ - err = -EOPNOTSUPP; - break; - case IW_AUTH_DROP_UNENCRYPTED: - wrqu->param.value = ieee->drop_unencrypted; - break; - case IW_AUTH_WPA_ENABLED: - wrqu->param.value = ieee->wpa_enabled; - break; - case IW_AUTH_RX_UNENCRYPTED_EAPOL: - wrqu->param.value = ieee->ieee802_1x; - break; - default: - err = -EOPNOTSUPP; - break; - } - spin_unlock_irqrestore(&ieee->lock, flags); - return err; -} - EXPORT_SYMBOL(ieee80211_wx_set_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_scan); EXPORT_SYMBOL(ieee80211_wx_set_encode); EXPORT_SYMBOL(ieee80211_wx_get_encode); - -EXPORT_SYMBOL_GPL(ieee80211_wx_set_auth); -EXPORT_SYMBOL_GPL(ieee80211_wx_get_auth); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 4670683b4688..691268f3a359 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -356,10 +356,8 @@ config INET_ESP config INET_IPCOMP tristate "IP: IPComp transformation" - select XFRM select INET_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. @@ -632,5 +630,3 @@ config TCP_MD5SIG If unsure, say N. -source "net/ipv4/ipvs/Kconfig" - diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..80ff87ce43aa 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ -obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24eca23c2db3..1fbff5fa4241 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,8 +5,6 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -112,12 +110,11 @@ #include <net/ipip.h> #include <net/inet_common.h> #include <net/xfrm.h> +#include <net/net_namespace.h> #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif -DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; - extern void ip_mc_drop_socket(struct sock *sk); /* The inetsw table contains everything that inet_create needs to @@ -151,10 +148,10 @@ void inet_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); kfree(inet->opt); dst_release(sk->sk_dst_cache); @@ -267,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol) static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; @@ -284,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -305,10 +300,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (unlikely(answer == NULL)) { + if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* @@ -344,7 +338,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); @@ -475,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && - !inet->freebind && + !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && @@ -664,8 +658,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); - BUG_TRAP((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); + WARN_ON(!((1 << sk2->sk_state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); @@ -1341,50 +1335,70 @@ static struct net_protocol icmp_protocol = { .netns_ok = 1, }; -static int __init init_ipv4_mibs(void) +static __net_init int ipv4_mib_init_net(struct net *net) { - if (snmp_mib_init((void **)net_statistics, - sizeof(struct linux_mib)) < 0) - goto err_net_mib; - if (snmp_mib_init((void **)ip_statistics, - sizeof(struct ipstats_mib)) < 0) - goto err_ip_mib; - if (snmp_mib_init((void **)icmp_statistics, - sizeof(struct icmp_mib)) < 0) - goto err_icmp_mib; - if (snmp_mib_init((void **)icmpmsg_statistics, - sizeof(struct icmpmsg_mib)) < 0) - goto err_icmpmsg_mib; - if (snmp_mib_init((void **)tcp_statistics, + if (snmp_mib_init((void **)net->mib.tcp_statistics, sizeof(struct tcp_mib)) < 0) goto err_tcp_mib; - if (snmp_mib_init((void **)udp_statistics, + if (snmp_mib_init((void **)net->mib.ip_statistics, + sizeof(struct ipstats_mib)) < 0) + goto err_ip_mib; + if (snmp_mib_init((void **)net->mib.net_statistics, + sizeof(struct linux_mib)) < 0) + goto err_net_mib; + if (snmp_mib_init((void **)net->mib.udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)udplite_statistics, + if (snmp_mib_init((void **)net->mib.udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; + if (snmp_mib_init((void **)net->mib.icmp_statistics, + sizeof(struct icmp_mib)) < 0) + goto err_icmp_mib; + if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, + sizeof(struct icmpmsg_mib)) < 0) + goto err_icmpmsg_mib; - tcp_mib_init(); - + tcp_mib_init(net); return 0; -err_udplite_mib: - snmp_mib_free((void **)udp_statistics); -err_udp_mib: - snmp_mib_free((void **)tcp_statistics); -err_tcp_mib: - snmp_mib_free((void **)icmpmsg_statistics); err_icmpmsg_mib: - snmp_mib_free((void **)icmp_statistics); + snmp_mib_free((void **)net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void **)ip_statistics); -err_ip_mib: - snmp_mib_free((void **)net_statistics); + snmp_mib_free((void **)net->mib.udplite_statistics); +err_udplite_mib: + snmp_mib_free((void **)net->mib.udp_statistics); +err_udp_mib: + snmp_mib_free((void **)net->mib.net_statistics); err_net_mib: + snmp_mib_free((void **)net->mib.ip_statistics); +err_ip_mib: + snmp_mib_free((void **)net->mib.tcp_statistics); +err_tcp_mib: return -ENOMEM; } +static __net_exit void ipv4_mib_exit_net(struct net *net) +{ + snmp_mib_free((void **)net->mib.icmpmsg_statistics); + snmp_mib_free((void **)net->mib.icmp_statistics); + snmp_mib_free((void **)net->mib.udplite_statistics); + snmp_mib_free((void **)net->mib.udp_statistics); + snmp_mib_free((void **)net->mib.net_statistics); + snmp_mib_free((void **)net->mib.ip_statistics); + snmp_mib_free((void **)net->mib.tcp_statistics); +} + +static __net_initdata struct pernet_operations ipv4_mib_ops = { + .init = ipv4_mib_init_net, + .exit = ipv4_mib_exit_net, +}; + +static int __init init_ipv4_mibs(void) +{ + return register_pernet_subsys(&ipv4_mib_ops); +} + static int ipv4_proc_init(void); /* @@ -1425,6 +1439,10 @@ static int __init inet_init(void) (void)sock_register(&inet_family_ops); +#ifdef CONFIG_SYSCTL + ip_static_sysctl_init(); +#endif + /* * Add all the base protocols. */ @@ -1481,14 +1499,15 @@ static int __init inet_init(void) * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) - ip_mr_init(); + if (ip_mr_init()) + printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; + printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); @@ -1560,5 +1579,4 @@ EXPORT_SYMBOL(inet_sock_destruct); EXPORT_SYMBOL(inet_stream_connect); EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); -EXPORT_SYMBOL(net_statistics); EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9b539fa9fe18..b043eda60b04 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,7 +1,5 @@ /* linux/net/ipv4/arp.c * - * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $ - * * Copyright (C) 1994 by Florian La Roche * * This module implements the Address Resolution Protocol ARP (RFC 826), @@ -423,11 +421,12 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) struct rtable *rt; int flag = 0; /*unsigned long now; */ + struct net *net = dev_net(dev); - if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) + if (ip_route_output_key(net, &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { - NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); + NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); @@ -1199,7 +1198,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; default: break; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c0e4572cc90..490e035c6d90 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -13,7 +13,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,17 +47,7 @@ #include <asm/bug.h> #include <asm/unaligned.h> -struct cipso_v4_domhsh_entry { - char *domain; - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - /* List of available DOI definitions */ -/* XXX - Updates should be minimal so having a single lock for the - * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be - * okay. */ /* XXX - This currently assumes a minimal number of different DOIs in use, * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we @@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1; * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 +/* Base length of the local tag (non-standard tag). + * Tag definition (may change between kernel versions) + * + * 0 8 16 24 32 + * +----------+----------+----------+----------+ + * | 10000000 | 00000110 | 32-bit secid value | + * +----------+----------+----------+----------+ + * | in (host byte order)| + * +----------+----------+ + * + */ +#define CIPSO_V4_TAG_LOC_BLEN 6 + /* * Helper Functions */ @@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap, } /** - * cipso_v4_doi_domhsh_free - Frees a domain list entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a domain list entry can be released - * safely. - * - */ -static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) -{ - struct cipso_v4_domhsh_entry *ptr; - - ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); - kfree(ptr->domain); - kfree(ptr); -} - -/** * cipso_v4_cache_entry_free - Frees a cache entry * @entry: the entry to free * @@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && iter->valid) + if (iter->doi == doi && atomic_read(&iter->refcount)) return iter; return NULL; } @@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) if (doi_def->type != CIPSO_V4_MAP_PASS) return -EINVAL; break; + case CIPSO_V4_TAG_LOCAL: + if (doi_def->type != CIPSO_V4_MAP_LOCAL) + return -EINVAL; + break; default: return -EINVAL; } } - doi_def->valid = 1; + atomic_set(&doi_def->refcount, 1); INIT_RCU_HEAD(&doi_def->rcu); - INIT_LIST_HEAD(&doi_def->dom_list); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) @@ -519,59 +506,129 @@ doi_add_failure: } /** + * cipso_v4_doi_free - Frees a DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function frees all of the memory associated with a DOI definition. + * + */ +void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + switch (doi_def->type) { + case CIPSO_V4_MAP_TRANS: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + break; + } + kfree(doi_def); +} + +/** + * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void cipso_v4_doi_free_rcu(struct rcu_head *entry) +{ + struct cipso_v4_doi *doi_def; + + doi_def = container_of(entry, struct cipso_v4_doi, rcu); + cipso_v4_doi_free(doi_def); +} + +/** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message - * @callback: the DOI cleanup/free callback * * Description: - * Removes a DOI definition from the CIPSO engine, @callback is called to - * free any memory. The NetLabel routines will be called to release their own - * LSM domain mappings as well as our own domain list. Returns zero on - * success and negative values on failure. + * Removes a DOI definition from the CIPSO engine. The NetLabel routines will + * be called to release their own LSM domain mappings as well as our own + * domain list. Returns zero on success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) { struct cipso_v4_doi *doi_def; - struct cipso_v4_domhsh_entry *dom_iter; spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def != NULL) { - doi_def->valid = 0; - list_del_rcu(&doi_def->list); + if (doi_def == NULL) { spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_lock(); - list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) - if (dom_iter->valid) - netlbl_cfg_map_del(dom_iter->domain, - audit_info); - rcu_read_unlock(); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, callback); - return 0; + return -ENOENT; + } + if (!atomic_dec_and_test(&doi_def->refcount)) { + spin_unlock(&cipso_v4_doi_list_lock); + return -EBUSY; } + list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - return -ENOENT; + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + + return 0; } /** - * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that - * rcu_read_lock() is held while accessing the returned definition. + * rcu_read_lock() is held while accessing the returned definition and the DOI + * definition reference count is decremented when the caller is done. * */ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) { - return cipso_v4_doi_search(doi); + struct cipso_v4_doi *doi_def; + + rcu_read_lock(); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) + goto doi_getdef_return; + if (!atomic_inc_not_zero(&doi_def->refcount)) + doi_def = NULL; + +doi_getdef_return: + rcu_read_unlock(); + return doi_def; +} + +/** + * cipso_v4_doi_putdef - Releases a reference for the given DOI definition + * @doi_def: the DOI definition + * + * Description: + * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). + * + */ +void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + if (!atomic_dec_and_test(&doi_def->refcount)) + return; + spin_lock(&cipso_v4_doi_list_lock); + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); } /** @@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) - if (iter_doi->valid) { + if (atomic_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); @@ -613,85 +670,6 @@ doi_walk_return: return ret_val; } -/** - * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to add - * - * Description: - * Adds the @domain to the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). This function - * does allocate memory. Returns zero on success, negative values on failure. - * - */ -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - struct cipso_v4_domhsh_entry *new_dom; - - new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); - if (new_dom == NULL) - return -ENOMEM; - if (domain) { - new_dom->domain = kstrdup(domain, GFP_KERNEL); - if (new_dom->domain == NULL) { - kfree(new_dom); - return -ENOMEM; - } - } - new_dom->valid = 1; - INIT_RCU_HEAD(&new_dom->rcu); - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - spin_unlock(&cipso_v4_doi_list_lock); - kfree(new_dom->domain); - kfree(new_dom); - return -EEXIST; - } - list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); - spin_unlock(&cipso_v4_doi_list_lock); - - return 0; -} - -/** - * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to remove - * - * Description: - * Removes the @domain from the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). Returns zero - * on success and negative values on error. - * - */ -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - iter->valid = 0; - list_del_rcu(&iter->list); - spin_unlock(&cipso_v4_doi_list_lock); - call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - return 0; - } - spin_unlock(&cipso_v4_doi_list_lock); - - return -ENOENT; -} - /* * Label Mapping Functions */ @@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) return 0; break; @@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *net_lvl = host_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_lvl < doi_def->map.std->lvl.local_size && doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { *net_lvl = doi_def->map.std->lvl.local[host_lvl]; @@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *host_lvl = net_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: map_tbl = doi_def->map.std; if (net_lvl < map_tbl->lvl.cipso_size && map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { @@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: cipso_cat_size = doi_def->map.std->cat.cipso_size; cipso_array = doi_def->map.std->cat.cipso; for (;;) { @@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, u32 host_cat_size = 0; u32 *host_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; } @@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: net_spot = host_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_spot >= host_cat_size) return -EPERM; net_spot = host_cat_array[host_spot]; @@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, u32 net_cat_size = 0; u32 *net_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; } @@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: host_spot = net_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (net_spot >= net_cat_size) return -EPERM; host_spot = net_cat_array[net_spot]; @@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x01; + buffer[0] = CIPSO_V4_TAG_RBITMAP; buffer[1] = tag_len; buffer[3] = level; @@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x02; + buffer[0] = CIPSO_V4_TAG_ENUM; buffer[1] = tag_len; buffer[3] = level; @@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x05; + buffer[0] = CIPSO_V4_TAG_RANGE; buffer[1] = tag_len; buffer[3] = level; @@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, } /** + * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the local tag. Returns the size of the tag + * on success, negative values on failure. + * + */ +static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + if (!(secattr->flags & NETLBL_SECATTR_SECID)) + return -EPERM; + + buffer[0] = CIPSO_V4_TAG_LOCAL; + buffer[1] = CIPSO_V4_TAG_LOC_BLEN; + *(u32 *)&buffer[2] = secattr->attr.secid; + + return CIPSO_V4_TAG_LOC_BLEN; +} + +/** + * cipso_v4_parsetag_loc - Parse a CIPSO local tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO local tag and return the security attributes in @secattr. + * Return zero on success, negatives values on failure. + * + */ +static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + secattr->attr.secid = *(u32 *)&tag[2]; + secattr->flags |= NETLBL_SECATTR_SECID; + + return 0; +} + +/** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * @@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, * that is unrecognized." * */ -int cipso_v4_validate(unsigned char **option) +int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { unsigned char *opt = *option; unsigned char *tag; @@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } - opt_iter = 6; + opt_iter = CIPSO_V4_HDR_LEN; tag = opt + opt_iter; while (opt_iter < opt_len) { for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) @@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option) switch (tag[0]) { case CIPSO_V4_TAG_RBITMAP: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RBM_BLEN && cipso_v4_map_cat_rbm_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_ENUM: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && cipso_v4_map_cat_enum_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_RANGE: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RNG_BLEN && cipso_v4_map_cat_rng_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } break; + case CIPSO_V4_TAG_LOCAL: + /* This is a non-standard tag that we only allow for + * local connections, so if the incoming interface is + * not the loopback device drop the packet. */ + if (!(skb->dev->flags & IFF_LOOPBACK)) { + err_offset = opt_iter; + goto validate_return_locked; + } + if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + break; default: err_offset = opt_iter; goto validate_return_locked; @@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_sock_setattr - Add a CIPSO option to a socket - * @sk: the socket + * cipso_v4_genopt - Generate a CIPSO option + * @buf: the option buffer + * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket + * @secattr: the security attributes * * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sk, which means it either needs to be in the - * process of being created or locked. Returns zero on success and negative - * values on failure. + * Generate a CIPSO option using the DOI definition and security attributes + * passed to the function. Returns the length of the option on success and + * negative values on failure. * */ -int cipso_v4_sock_setattr(struct sock *sk, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - int ret_val = -EPERM; + int ret_val; u32 iter; - unsigned char *buf; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) - return 0; - - /* We allocate the maximum CIPSO option size here so we are probably - * being a little wasteful, but it makes our life _much_ easier later - * on and after all we are only talking about 40 bytes. */ - buf_len = CIPSO_V4_OPT_LEN_MAX; - buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } + if (buf_len <= CIPSO_V4_HDR_LEN) + return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC @@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_gentag_loc(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; default: - ret_val = -EPERM; - goto socket_setattr_failure; + return -EPERM; } iter++; @@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk, iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) - goto socket_setattr_failure; + return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); - buf_len = CIPSO_V4_HDR_LEN + ret_val; + return CIPSO_V4_HDR_LEN + ret_val; +} + +/** + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + if (sk == NULL) + return 0; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto socket_setattr_failure; + buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and @@ -1822,6 +1894,80 @@ socket_setattr_failure: } /** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + u8 hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + hdr_delta = opt->optlen; + opt->optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->optlen; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + sk_inet->opt = NULL; + hdr_delta = opt->optlen; + kfree(opt); + } + + if (sk_inet->is_icsk && hdr_delta > 0) { + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len -= hdr_delta; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } +} + +/** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option * @secattr: the security attributes @@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso, case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); + break; } if (ret_val == 0) secattr->type = NETLBL_NLTYPE_CIPSOV4; @@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** + * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Set the CIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; + u32 buf_len = CIPSO_V4_OPT_LEN_MAX; + u32 opt_len; + int len_delta; + + buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (buf_len < 0) + return buf_len; + opt_len = (buf_len + 3) & ~3; + + /* we overwrite any existing options to ensure that we have enough + * room for the CIPSO option, the reason is that we _need_ to guarantee + * that the security label is applied to the packet - we do the same + * thing when using the socket options and it hasn't caused a problem, + * if we need to we can always revisit this choice later */ + + len_delta = opt_len - opt->optlen; + /* if we don't ensure enough headroom we could panic on the skb_push() + * call below so make sure we have enough, we are also "mangling" the + * packet so we should probably do a copy-on-write call anyway */ + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta > 0) { + /* we assume that the header + opt->optlen have already been + * "pushed" in ip_options_build() or similar */ + iph = ip_hdr(skb); + skb_push(skb, len_delta); + memmove((char *)iph - len_delta, iph, iph->ihl << 2); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + } else if (len_delta < 0) { + iph = ip_hdr(skb); + memset(iph + 1, IPOPT_NOP, opt->optlen); + } else + iph = ip_hdr(skb); + + if (opt->optlen > 0) + memset(opt, 0, sizeof(*opt)); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + opt->is_changed = 1; + + /* we have to do the following because we are being called from a + * netfilter hook which means the packet already has had the header + * fields populated and the checksum calculated - yes this means we + * are doing more work than needed but we do it to keep the core + * stack clean and tidy */ + memcpy(iph + 1, buf, buf_len); + if (opt_len > buf_len) + memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); + if (len_delta != 0) { + iph->ihl = 5 + (opt_len >> 2); + iph->tot_len = htons(skb->len); + } + ip_send_check(iph); + + return 0; +} + +/** + * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char *cipso_ptr; + + if (opt->cipso == 0) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + /* the easiest thing to do is just replace the cipso option with noop + * options since we don't change the size of the packet, although we + * still need to recalculate the checksum */ + + iph = ip_hdr(skb); + cipso_ptr = (unsigned char *)iph + opt->cipso; + memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + opt->cipso = 0; + opt->is_changed = 1; + + ip_send_check(iph); + + return 0; +} + +/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 0c0c73f368ce..5e6c5a0f3fde 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->sport, usin->sin_port, sk, 1); if (err) { if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 79a7ef6209ff..b12dae2b0b2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,8 +1,6 @@ /* * NET3 IP device support routines. * - * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -140,8 +138,8 @@ void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(!idev->ifa_list); - BUG_TRAP(!idev->mc_list); + WARN_ON(idev->ifa_list); + WARN_ON(idev->mc_list); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); @@ -170,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) goto out_kfree; + if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) + dev_disable_lro(dev); /* Reference in_dev->dev */ dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ @@ -399,7 +399,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) } ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { - BUG_TRAP(!ifa->ifa_dev); + WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); ifa->ifa_dev = in_dev; } @@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) memcpy(old, ifa->ifa_label, IFNAMSIZ); memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); if (named++ == 0) - continue; + goto skip; dot = strchr(old, ':'); if (dot == NULL) { sprintf(old, ":%d", named); @@ -1024,9 +1024,16 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) } else { strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); } +skip: + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } +static inline bool inetdev_valid_mtu(unsigned mtu) +{ + return mtu >= 68; +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1046,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } + } else if (event == NETDEV_CHANGEMTU) { + /* Re-enabling IP */ + if (inetdev_valid_mtu(dev->mtu)) + in_dev = inetdev_init(dev); } goto out; } @@ -1056,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, dev->ip_ptr = NULL; break; case NETDEV_UP: - if (dev->mtu < 68) + if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; @@ -1078,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_down(in_dev); break; case NETDEV_CHANGEMTU: - if (dev->mtu >= 68) + if (inetdev_valid_mtu(dev->mtu)) break; - /* MTU falled under 68, disable IP */ + /* disable IP when MTU is not enough */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; @@ -1241,6 +1252,8 @@ static void inet_forward_change(struct net *net) read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; + if (on) + dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) @@ -1248,8 +1261,6 @@ static void inet_forward_change(struct net *net) rcu_read_unlock(); } read_unlock(&dev_base_lock); - - rt_cache_flush(0); } static int devinet_conf_proc(ctl_table *ctl, int write, @@ -1335,10 +1346,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, if (write && *valp != val) { struct net *net = ctl->extra2; - if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) - inet_forward_change(net); - else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) - rt_cache_flush(0); + if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { + rtnl_lock(); + if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { + inet_forward_change(net); + } else if (*valp) { + struct ipv4_devconf *cnf = ctl->extra1; + struct in_device *idev = + container_of(cnf, struct in_device, cnf); + dev_disable_lro(idev->dev); + } + rtnl_unlock(); + rt_cache_flush(net, 0); + } } return ret; @@ -1351,9 +1371,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } @@ -1364,9 +1385,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, { int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, newval, newlen); + struct net *net = table->extra2; if (ret == 1) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4e73e5708e70..21515d4c49eb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -575,7 +575,7 @@ static int esp_init_state(struct xfrm_state *x) crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); - else if (x->props.mode == XFRM_MODE_BEET) + else if (x->props.mode == XFRM_MODE_BEET && x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN; if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0b2ac6a3d903..65c1503f8cc8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: FIB frontend. * - * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -146,7 +144,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(-1); + rt_cache_flush(net, -1); } /* @@ -899,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); arp_ifdown(dev); } static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct net_device *dev = ifa->ifa_dev->dev; switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(ifa->ifa_dev->dev); + fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); @@ -921,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(ifa->ifa_dev->dev, 1); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); } break; } @@ -951,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 2e2fc3376ac9..c8cac6c7f881 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -5,8 +5,6 @@ * * IPv4 FIB: lookup engine and maintenance routines. * - * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); return 0; @@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) if (new_f) fz->fz_nent++; - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) write_unlock_bh(&fib_hash_lock); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fn_free_alias(fa, f); if (kill_fn) { fn_free_node(f); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1fb56876be54..6080d7120821 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(-1); + rt_cache_flush(ops->fro_net, -1); } static struct fib_rules_ops fib4_rules_ops_template = { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0d4d72827e4b..ded2ae34eab1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: semantics. * - * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4b02d14e7ab9..5cb72786a8af 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -22,8 +22,6 @@ * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * - * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ - * * * Code from fib_hash has been reused which includes the following header: * @@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1359,17 +1357,17 @@ static int check_leaf(struct trie *t, struct leaf *l, t->stats.semantic_match_miss++; #endif if (err <= 0) - return plen; + return err; } - return -1; + return 1; } static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; - int plen, ret = 0; + int ret; struct node *n; struct tnode *pn; int pos, bits; @@ -1393,10 +1391,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) - goto failed; - ret = 0; + ret = check_leaf(t, (struct leaf *)n, key, flp, res); goto found; } @@ -1421,11 +1416,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) + ret = check_leaf(t, (struct leaf *)n, key, flp, res); + if (ret > 0) goto backtrace; - - ret = 0; goto found; } @@ -1666,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2258,25 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - net = get_proc_net(inode); - if (net == NULL) - return -ENXIO; - err = single_open(file, fib_triestat_seq_show, net); - if (err < 0) { - put_net(net); - return err; - } - return 0; -} - -static int fib_triestat_seq_release(struct inode *ino, struct file *f) -{ - struct seq_file *seq = f->private_data; - put_net(seq->private); - return single_release(ino, f); + return single_open_net(inode, file, fib_triestat_seq_show); } static const struct file_operations fib_triestat_fops = { @@ -2284,7 +2259,7 @@ static const struct file_operations fib_triestat_fops = { .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = fib_triestat_seq_release, + .release = single_release_net, }; static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 87397351ddac..72b2de76f1cd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,9 +1,7 @@ /* * NET3: Implementation of the ICMP protocol layer. * - * Alan Cox, <alan@redhat.com> - * - * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -113,12 +111,6 @@ struct icmp_bxm { unsigned char optbuf[40]; }; -/* - * Statistics - */ -DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; -DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly; - /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ @@ -212,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -298,10 +294,10 @@ out: /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ -void icmp_out_count(unsigned char type) +void icmp_out_count(struct net *net, unsigned char type) { - ICMPMSGOUT_INC_STATS(type); - ICMP_INC_STATS(ICMP_MIB_OUTMSGS); + ICMPMSGOUT_INC_STATS(net, type); + ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS); } /* @@ -362,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; struct net *net = dev_net(rt->u.dst.dev); - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; @@ -427,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; net = dev_net(rt->u.dst.dev); - sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -491,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* @@ -765,7 +763,7 @@ static void icmp_unreach(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto out; } @@ -805,7 +803,7 @@ static void icmp_redirect(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); goto out; } @@ -876,7 +874,7 @@ static void icmp_timestamp(struct sk_buff *skb) out: return; out_err: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS); goto out; } @@ -975,6 +973,7 @@ int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; struct rtable *rt = skb->rtable; + struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { int nh; @@ -995,7 +994,7 @@ int icmp_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -1013,7 +1012,7 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); - ICMPMSGIN_INC_STATS_BH(icmph->type); + ICMPMSGIN_INC_STATS_BH(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1029,9 +1028,6 @@ int icmp_rcv(struct sk_buff *skb) */ if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - struct net *net; - - net = dev_net(rt->u.dst.dev); /* * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be * silently ignored (we let user decide with a sysctl). @@ -1057,7 +1053,7 @@ drop: kfree_skb(skb); return 0; error: - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); goto drop; } @@ -1217,5 +1213,4 @@ int __init icmp_init(void) EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(icmp_statistics); EXPORT_SYMBOL(xrlim_allow); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2769dc4a4c84..a0d86455c53e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,10 +8,8 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $ - * * Authors: - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -291,6 +289,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct rtable *rt; struct iphdr *pip; struct igmpv3_report *pig; + struct net *net = dev_net(dev); skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) @@ -301,7 +300,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); return NULL; } @@ -631,6 +630,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; + struct net *net = dev_net(dev); __be32 group = pmc ? pmc->multiaddr : 0; __be32 dst; @@ -645,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct flowi fl = { .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) return -1; } if (rt->rt_src == 0) { @@ -1198,9 +1198,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - for (im=in_dev->mc_list; im; im=im->next) { if (im->multiaddr == addr) { im->users++; @@ -1237,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) write_lock_bh(&in_dev->mc_list_lock); im->next=in_dev->mc_list; in_dev->mc_list=im; + in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1280,14 +1278,12 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { if (i->multiaddr==addr) { if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); @@ -1310,9 +1306,6 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - for (i=in_dev->mc_list; i; i=i->next) igmp_group_dropped(i); @@ -1333,15 +1326,13 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); in_dev->mr_ifc_count = 0; + in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -1359,9 +1350,6 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev->mc_list; i; i=i->next) @@ -1378,17 +1366,14 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (dev_net(in_dev->dev) != &init_net) - return; - /* Deactivate timers */ ip_mc_down(in_dev); write_lock_bh(&in_dev->mc_list_lock); while ((i = in_dev->mc_list) != NULL) { in_dev->mc_list = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); - igmp_group_dropped(i); ip_ma_put(i); @@ -1397,7 +1382,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_unlock_bh(&in_dev->mc_list_lock); } -static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } }; @@ -1406,19 +1391,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(&init_net, imr->imr_ifindex); + idev = inetdev_by_index(net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(&init_net, imr->imr_address.s_addr); + dev = ip_dev_find(net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); } - if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) { + if (!dev && !ip_route_output_key(net, &rt, &fl)) { dev = rt->u.dst.dev; ip_rt_put(rt); } @@ -1756,18 +1741,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int ifindex; int count = 0; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { iml = NULL; @@ -1829,15 +1812,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; + struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { if (iml->multi.imr_multiaddr.s_addr != group) @@ -1875,21 +1856,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, j, rv; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2009,6 +1988,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; if (!ipv4_is_multicast(addr)) @@ -2017,15 +1997,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2096,19 +2073,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2165,9 +2140,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sock_net(sk) != &init_net) - return -EPROTONOSUPPORT; - rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2248,19 +2220,17 @@ void ip_mc_drop_socket(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; + struct net *net = sock_net(sk); if (inet->mc_list == NULL) return; - if (sock_net(sk) != &init_net) - return; - rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); @@ -2416,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) if (state->in_dev->mc_list == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", - state->dev->ifindex, state->dev->name, state->dev->mc_count, querier); + state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } seq_printf(seq, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ec834480abe7..bd1278a2d828 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -103,7 +105,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) rover = net_random() % remaining + low; do { - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, rover, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == rover) @@ -130,7 +133,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, snum, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == snum) @@ -165,7 +169,7 @@ tb_not_found: success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); ret = 0; fail_unlock: @@ -258,7 +262,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) } newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); + WARN_ON(newsk->sk_state == TCP_SYN_RECV); out: release_sock(sk); return newsk; @@ -333,18 +337,20 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { ip_rt_put(rt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->u.dst; @@ -383,7 +389,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, ireq->rmt_addr == raddr && ireq->loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); + WARN_ON(req->sk); *prevp = prev; break; } @@ -512,6 +518,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; @@ -536,14 +544,14 @@ EXPORT_SYMBOL_GPL(inet_csk_clone); */ void inet_csk_destroy_sock(struct sock *sk) { - BUG_TRAP(sk->sk_state == TCP_CLOSE); - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); + WARN_ON(sk->sk_state != TCP_CLOSE); + WARN_ON(!sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); + WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -626,7 +634,7 @@ void inet_csk_listen_stop(struct sock *sk) local_bh_disable(); bh_lock_sock(child); - BUG_TRAP(!sock_owned_by_user(child)); + WARN_ON(sock_owned_by_user(child)); sock_hold(child); sk->sk_prot->disconnect(child, O_NONBLOCK); @@ -644,7 +652,7 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } - BUG_TRAP(!sk->sk_ack_backlog); + WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index da97695e7096..89cb047ab314 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1,8 +1,6 @@ /* * inet_diag.c Module for monitoring INET transport protocols sockets. * - * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -784,11 +782,15 @@ skip_listen_ht: struct sock *sk; struct hlist_node *node; + num = 0; + + if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + continue; + if (i > s_i) s_num = 0; read_lock_bh(lock); - num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429bd5951..6c52e08f786e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -134,8 +134,8 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, struct sk_buff *fp; struct netns_frags *nf; - BUG_TRAP(q->last_in & INET_FRAG_COMPLETE); - BUG_TRAP(del_timer(&q->timer) == 0); + WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fp = q->fragments; @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2023d37b2708..44981906fb91 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, + hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, + table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net, const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; if (!hlist_empty(head)) { const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); @@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct net *net = sock_net(sk); + unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -303,18 +305,18 @@ unique: inet->num = lport; inet->sport = htons(lport); sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); if (twp) { *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } @@ -340,7 +342,7 @@ void __inet_hash_nolisten(struct sock *sk) rwlock_t *lock; struct inet_ehash_bucket *head; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); @@ -365,7 +367,7 @@ static void __inet_hash(struct sock *sk) return; } - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; @@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -447,7 +450,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->ib_net == net && tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; if (!check_established(death_row, sk, @@ -493,7 +496,7 @@ ok: goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 4a4d49fca1f2..cfd034a2b96e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, out2: /* send aggregated SKBs to stack */ lro_flush(lro_mgr, lro_desc); -out: /* Original SKB has to be posted to stack */ - skb->ip_summed = lro_mgr->ip_summed; +out: return 1; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ce16e9ac24c1..1c5fd38f8824 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, write_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); @@ -81,10 +82,11 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; - BUG_TRAP(icsk->icsk_bind_hash); + WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -124,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); @@ -158,6 +161,9 @@ rescan: __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); +#ifdef CONFIG_NET_NS + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); +#endif inet_twsk_put(tw); killed++; spin_lock(&twdr->death_lock); @@ -176,8 +182,9 @@ rescan: } twdr->tw_count -= killed; - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); - +#ifndef CONFIG_NET_NS + NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); +#endif return ret; } @@ -370,6 +377,9 @@ void inet_twdr_twcal_tick(unsigned long data) &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); +#ifdef CONFIG_NET_NS + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); +#endif inet_twsk_put(tw); killed++; } @@ -393,8 +403,45 @@ void inet_twdr_twcal_tick(unsigned long data) out: if ((twdr->tw_count -= killed) == 0) del_timer(&twdr->tw_timer); - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); +#ifndef CONFIG_NET_NS + NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); +#endif spin_unlock(&twdr->death_lock); } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index af995198f643..a456ceeac3f2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -3,8 +3,6 @@ * * This source is covered by the GNU GPL, the same as all kernel sources. * - * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $ - * * Authors: Andrey V. Savochkin <saw@msu.ru> */ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 4813c39b438b..450016b89a18 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -5,8 +5,6 @@ * * The IP forwarding functionality. * - * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $ - * * Authors: see ip.c * * Fixes: @@ -44,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); + if (skb_warn_if_lro(skb)) + goto drop; + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; @@ -87,7 +88,7 @@ int ip_forward(struct sk_buff *skb) if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); goto drop; @@ -122,7 +123,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac6358..e4f81f54befe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,10 +5,8 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ - * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. @@ -180,7 +178,7 @@ static void ip_evictor(struct net *net) evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); if (evicted) - IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); + IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -189,8 +187,10 @@ static void ip_evictor(struct net *net) static void ip_expire(unsigned long arg) { struct ipq *qp; + struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); @@ -199,14 +199,12 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); - IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; - struct net *net; - net = container_of(qp->q.net, struct net, ipv4.frags); /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); @@ -229,6 +227,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); @@ -261,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp) rc = qp->q.fragments && (end - start) > max; if (rc) { - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + struct net *net; + + net = container_of(qp->q.net, struct net, ipv4.frags); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); } return rc; @@ -485,8 +488,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); @@ -545,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); - IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; return 0; @@ -560,7 +563,7 @@ out_oversize: "Oversized IP packet from " NIPQUAD_FMT ".\n", NIPQUAD(qp->saddr)); out_fail: - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); return err; } @@ -570,9 +573,9 @@ int ip_defrag(struct sk_buff *skb, u32 user) struct ipq *qp; struct net *net; - IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); @@ -590,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) return ret; } - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } @@ -598,7 +601,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) #ifdef CONFIG_SYSCTL static int zero; -static struct ctl_table ip4_frags_ctl_table[] = { +static struct ctl_table ip4_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", @@ -624,6 +627,10 @@ static struct ctl_table ip4_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", @@ -644,22 +651,20 @@ static struct ctl_table ip4_frags_ctl_table[] = { { } }; -static int ip4_frags_ctl_register(struct net *net) +static int ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip4_frags_ctl_table; + table = ip4_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; table[2].data = &net->ipv4.frags.timeout; - table[3].mode &= ~0222; - table[4].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); @@ -676,7 +681,7 @@ err_alloc: return -ENOMEM; } -static void ip4_frags_ctl_unregister(struct net *net) +static void ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -684,13 +689,22 @@ static void ip4_frags_ctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} #else -static inline int ip4_frags_ctl_register(struct net *net) +static inline int ip4_frags_ns_ctl_register(struct net *net) { return 0; } -static inline void ip4_frags_ctl_unregister(struct net *net) +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { } #endif @@ -714,12 +728,12 @@ static int ipv4_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv4.frags); - return ip4_frags_ctl_register(net); + return ip4_frags_ns_ctl_register(net); } static void ipv4_frags_exit_net(struct net *net) { - ip4_frags_ctl_unregister(net); + ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } @@ -730,6 +744,7 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { + ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4342cba4ff82..85c487b8572b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -27,6 +27,7 @@ #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> #include <linux/if_ether.h> #include <net/sock.h> @@ -41,6 +42,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/rtnetlink.h> #ifdef CONFIG_IPV6 #include <net/ipv6.h> @@ -117,8 +119,10 @@ Alexey Kuznetsov. */ +static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); +static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ @@ -163,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock); /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, - __be32 remote, __be32 local, __be32 key) + __be32 remote, __be32 local, + __be32 key, __be16 gre_proto) { unsigned h0 = HASH(remote); unsigned h1 = HASH(key); struct ip_tunnel *t; + struct ip_tunnel *t2 = NULL; struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IPGRE; for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { if (remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_l[h1]; t; t = t->next) { if (local == t->parms.iph.saddr || (local == t->parms.iph.daddr && ipv4_is_multicast(local))) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_wc[h1]; t; t = t->next) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } + if (t2) + return t2; + if (ign->fb_tunnel_dev->flags&IFF_UP) return netdev_priv(ign->fb_tunnel_dev); return NULL; @@ -249,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) } } -static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) +static struct ip_tunnel *ipgre_tunnel_find(struct net *net, + struct ip_tunnel_parm *parms, + int type) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; - struct ip_tunnel *t, **tp, *nt; + struct ip_tunnel *t, **tp; + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + + for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + type == t->dev->type) + break; + + return t; +} + +static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, + struct ip_tunnel_parm *parms, int create) +{ + struct ip_tunnel *t, *nt; struct net_device *dev; char name[IFNAMSIZ]; struct ipgre_net *ign = net_generic(net, ipgre_net_id); - for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (key == t->parms.i_key) - return t; - } - } - if (!create) - return NULL; + t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); + if (t || !create) + return t; if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); @@ -285,9 +327,11 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, goto failed_free; } - dev->init = ipgre_tunnel_init; nt = netdev_priv(dev); nt->parms = *parms; + dev->rtnl_link_ops = &ipgre_link_ops; + + dev->mtu = ipgre_tunnel_bind_dev(dev); if (register_netdevice(dev) < 0) goto failed_free; @@ -380,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) read_lock(&ipgre_lock); t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, - (flags&GRE_KEY) ? - *(((__be32*)p) + (grehlen>>2) - 1) : 0); + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); if (t == NULL || t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; @@ -431,6 +476,8 @@ static int ipgre_rcv(struct sk_buff *skb) u32 seqno = 0; struct ip_tunnel *tunnel; int offset = 4; + __be16 gre_proto; + unsigned int len; if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -470,18 +517,22 @@ static int ipgre_rcv(struct sk_buff *skb) } } + gre_proto = *(__be16 *)(h + 2); + read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), - iph->saddr, iph->daddr, key)) != NULL) { + iph->saddr, iph->daddr, key, + gre_proto))) { + struct net_device_stats *stats = &tunnel->dev->stats; + secpath_reset(skb); - skb->protocol = *(__be16*)(h + 2); + skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ - if (flags == 0 && - skb->protocol == htons(ETH_P_WCCP)) { + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { skb->protocol = htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; @@ -489,7 +540,6 @@ static int ipgre_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; __pskb_pull(skb, offset); - skb_reset_network_header(skb); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -497,33 +547,52 @@ static int ipgre_rcv(struct sk_buff *skb) /* Looped back packet, drop it! */ if (skb->rtable->fl.iif == 0) goto drop; - tunnel->stat.multicast++; + stats->multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if (((flags&GRE_CSUM) && csum) || (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->stat.rx_crc_errors++; - tunnel->stat.rx_errors++; + stats->rx_crc_errors++; + stats->rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { if (!(flags&GRE_SEQ) || (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->stat.rx_fifo_errors++; - tunnel->stat.rx_errors++; + stats->rx_fifo_errors++; + stats->rx_errors++; goto drop; } tunnel->i_seqno = seqno + 1; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + + len = skb->len; + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + stats->rx_length_errors++; + stats->rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + stats->rx_packets++; + stats->rx_bytes += len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; nf_reset(skb); + + skb_reset_network_header(skb); ipgre_ecn_decapsulate(iph, skb); + netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -540,7 +609,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -554,11 +623,14 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } - if (dev->header_ops) { + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; tiph = (struct iphdr*)skb->data; } else { @@ -570,7 +642,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* NBMA tunnel */ if (skb->dst == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } @@ -621,7 +693,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_GRE }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error; } } @@ -629,13 +701,13 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; + mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; @@ -701,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; + skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -734,8 +806,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); } - ((__be16*)(iph+1))[0] = tunnel->parms.o_flags; - ((__be16*)(iph+1))[1] = skb->protocol; + ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); @@ -771,7 +844,7 @@ tx_error: return 0; } -static void ipgre_tunnel_bind_dev(struct net_device *dev) +static int ipgre_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; @@ -783,7 +856,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; - /* Guess output device to choose reasonable mtu and hard_header_len */ + /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, @@ -797,14 +870,16 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tdev = rt->u.dst.dev; ip_rt_put(rt); } - dev->flags |= IFF_POINTOPOINT; + + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); if (tdev) { - hlen = tdev->hard_header_len; + hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } dev->iflink = tunnel->parms.link; @@ -818,10 +893,15 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) if (tunnel->parms.o_flags&GRE_SEQ) addend += 4; } - dev->hard_header_len = hlen + addend; - dev->mtu = mtu - addend; + dev->needed_headroom = addend + hlen; + mtu -= dev->hard_header_len - addend; + + if (mtu < 68) + mtu = 68; + tunnel->hlen = addend; + return mtu; } static int @@ -915,7 +995,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t->parms.iph.frag_off = p.iph.frag_off; if (t->parms.link != p.link) { t->parms.link = p.link; - ipgre_tunnel_bind_dev(dev); + dev->mtu = ipgre_tunnel_bind_dev(dev); netdev_state_change(dev); } } @@ -954,15 +1034,11 @@ done: return err; } -static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1081,15 +1157,15 @@ static int ipgre_close(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { + dev->init = ipgre_tunnel_init; dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->get_stats = ipgre_tunnel_get_stats; dev->do_ioctl = ipgre_tunnel_ioctl; dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; dev->flags = IFF_NOARP; dev->iflink = 0; @@ -1111,8 +1187,6 @@ static int ipgre_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - ipgre_tunnel_bind_dev(dev); - if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -1193,6 +1267,7 @@ static int ipgre_init_net(struct net *net) ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; dev_net_set(ign->fb_tunnel_dev, net); + ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) goto err_reg_dev; @@ -1225,6 +1300,298 @@ static struct pernet_operations ipgre_net_ops = { .exit = ipgre_exit_net, }; +static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be32 daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); + if (!daddr) + return -EINVAL; + } + +out: + return ipgre_tunnel_validate(tb, data); +} + +static void ipgre_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + parms->iph.protocol = IPPROTO_GRE; + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); + + if (data[IFLA_GRE_REMOTE]) + parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); + + if (data[IFLA_GRE_TTL]) + parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); + + if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); +} + +static int ipgre_tap_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ipgre_tunnel_bind_dev(dev); + + return 0; +} + +static void ipgre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->init = ipgre_tap_init; + dev->uninit = ipgre_tunnel_uninit; + dev->destructor = free_netdev; + dev->hard_start_xmit = ipgre_tunnel_xmit; + dev->change_mtu = ipgre_tunnel_change_mtu; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int mtu; + int err; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &nt->parms); + + if (ipgre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + random_ether_addr(dev->dev_addr); + + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ipgre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + struct ip_tunnel_parm p; + int mtu; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &p); + + t = ipgre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + unsigned nflags = 0; + + t = nt; + + if (ipv4_is_multicast(p.iph.daddr)) + nflags = IFF_BROADCAST; + else if (p.iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + + ipgre_tunnel_unlink(ign, t); + t->parms.iph.saddr = p.iph.saddr; + t->parms.iph.daddr = p.iph.daddr; + t->parms.i_key = p.i_key; + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + ipgre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + t->parms.o_key = p.o_key; + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + + if (t->parms.link != p.link) { + t->parms.link = p.link; + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + netdev_state_change(dev); + } + + return 0; +} + +static size_t ipgre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_PMTUDISC */ + nla_total_size(1) + + 0; +} + +static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm *p = &t->parms; + + NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); + NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); + NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); + NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); + NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); + NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); + NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); + NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); + NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); + NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_TOS] = { .type = NLA_U8 }, + [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, +}; + +static struct rtnl_link_ops ipgre_link_ops __read_mostly = { + .kind = "gre", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tunnel_setup, + .validate = ipgre_tunnel_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + +static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { + .kind = "gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tap_setup, + .validate = ipgre_tap_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + /* * And now the modules code and kernel interface. */ @@ -1242,19 +1609,39 @@ static int __init ipgre_init(void) err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); if (err < 0) - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto gen_device_failed; + err = rtnl_link_register(&ipgre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ipgre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: return err; + +tap_ops_failed: + rtnl_link_unregister(&ipgre_link_ops); +rtnl_link_failed: + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); +gen_device_failed: + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto out; } static void __exit ipgre_fini(void) { + rtnl_link_unregister(&ipgre_tap_ops); + rtnl_link_unregister(&ipgre_link_ops); + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); - - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); } module_init(ipgre_init); module_exit(ipgre_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("gre"); +MODULE_ALIAS_RTNL_LINK("gretap"); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index ff77a4a7f9ec..861978a4f1a8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,12 +5,10 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> @@ -147,12 +145,6 @@ #include <linux/netlink.h> /* - * SNMP management statistics - */ - -DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; - -/* * Process Router Attention IP option */ int ip_call_ra_chain(struct sk_buff *skb) @@ -232,16 +224,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb) protocol = -ret; goto resubmit; } - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } } else - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } } @@ -283,7 +275,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -292,7 +284,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); goto drop; } @@ -336,9 +328,11 @@ static int ip_rcv_finish(struct sk_buff *skb) skb->dev); if (unlikely(err)) { if (err == -EHOSTUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INADDRERRORS); else if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INNOROUTES); goto drop; } } @@ -359,9 +353,9 @@ static int ip_rcv_finish(struct sk_buff *skb) rt = skb->rtable; if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS); return dst_input(skb); @@ -384,10 +378,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } @@ -420,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -430,7 +424,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -441,11 +435,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, ip_rcv_finish); inhdr_error: - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: return NET_RX_DROP; } - -EXPORT_SYMBOL(ip_statistics); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 33126ad2cfdc..2c88da6e7862 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -5,8 +5,6 @@ * * The options processing module for ip.c * - * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ - * * Authors: A.N.Kuznetsov * */ @@ -440,7 +438,7 @@ int ip_options_compile(struct net *net, goto error; } opt->cipso = optptr - iph; - if (cipso_v4_validate(&optptr)) { + if (cipso_v4_validate(skb, &optptr)) { pp_ptr = optptr; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e527628f56cf..d2a8f8bb78a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,8 +5,6 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> @@ -120,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; } @@ -184,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) unsigned int hh_len = LL_RESERVED_SPACE(dev); if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -246,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb) /* * If the indicated interface is up and running, send the packet. */ - IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -300,7 +298,7 @@ int ip_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; - IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -342,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -391,7 +390,7 @@ packet_routed: return ip_local_out(skb); no_route: - IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EHOSTUNREACH; } @@ -453,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) iph = ip_hdr(skb); if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(ip_skb_dst_mtu(skb))); kfree_skb(skb); @@ -544,7 +543,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); if (!err) - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -554,7 +553,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) } if (err == 0) { - IP_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return 0; } @@ -563,7 +562,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) kfree_skb(frag); frag = skb; } - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } @@ -675,15 +674,15 @@ slow_path: if (err) goto fail; - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); - IP_INC_STATS(IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return err; fail: kfree_skb(skb); - IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } @@ -1049,7 +1048,7 @@ alloc_new_skb: error: inet->cork.length -= length; - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1191,7 +1190,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, error: inet->cork.length -= size; - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1213,6 +1212,7 @@ int ip_push_pending_frames(struct sock *sk) struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct ip_options *opt = NULL; struct rtable *rt = (struct rtable *)inet->cork.dst; struct iphdr *iph; @@ -1282,7 +1282,7 @@ int ip_push_pending_frames(struct sock *sk) skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(((struct icmphdr *) + icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); /* Netfilter gets whole the not fragmented skb. */ @@ -1299,7 +1299,7 @@ out: return err; error: - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); goto out; } @@ -1372,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e0514e82308e..465abf0a9869 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,8 +5,6 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $ - * * Authors: see ip.c * * Fixes: @@ -421,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_TTL) | (1<<IP_HDRINCL) | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | - (1<<IP_PASSSEC))) || + (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP) { if (optlen >= sizeof(int)) { @@ -880,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; @@ -1132,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet->freebind; break; + case IP_TRANSPARENT: + val = inet->transparent; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a75807b971b3..38ccb6dfb02e 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -14,153 +14,14 @@ * - Adaptive compression. */ #include <linux/module.h> -#include <linux/crypto.h> #include <linux/err.h> -#include <linux/pfkeyv2.h> -#include <linux/percpu.h> -#include <linux/smp.h> -#include <linux/list.h> -#include <linux/vmalloc.h> #include <linux/rtnetlink.h> -#include <linux/mutex.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/icmp.h> #include <net/ipcomp.h> #include <net/protocol.h> - -struct ipcomp_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp_resource_mutex); -static void **ipcomp_scratches; -static int ipcomp_scratch_users; -static LIST_HEAD(ipcomp_tfms_list); - -static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - const u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - - if (err) - goto out; - - if (dlen < (plen + sizeof(struct ip_comp_hdr))) { - err = -EINVAL; - goto out; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) - goto out; - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); -out: - put_cpu(); - return err; -} - -static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - err = ipcomp_decompress(x, skb); - if (err) - goto out; - - err = nexthdr; - -out: - return err; -} - -static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err; - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err) - goto out; - - if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { - err = -EMSGSIZE; - goto out; - } - - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - return 0; - -out: - put_cpu(); - return err; -} - -static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - - if (skb->len < ipcd->threshold) { - /* Don't bother compressing */ - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - err = ipcomp_compress(x, skb); - - if (err) { - goto out_ok; - } - - /* Install ipcomp header, convert into ipcomp datagram. */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; -out_ok: - skb_push(skb, -skb_network_offset(skb)); - return 0; -} +#include <net/sock.h> static void ipcomp4_err(struct sk_buff *skb, u32 info) { @@ -241,155 +102,9 @@ out: return err; } -static void ipcomp_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp_scratch_users) - return; - - scratches = ipcomp_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) - vfree(*per_cpu_ptr(scratches, i)); - - free_percpu(scratches); -} - -static void **ipcomp_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp_scratch_users++) - return ipcomp_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) -{ - struct ipcomp_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp_free_tfms(tfms); - return NULL; -} - -static void ipcomp_free_data(struct ipcomp_data *ipcd) +static int ipcomp4_init_state(struct xfrm_state *x) { - if (ipcd->tfms) - ipcomp_free_tfms(ipcd->tfms); - ipcomp_free_scratches(); -} - -static void ipcomp_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp_resource_mutex); - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); -} - -static int ipcomp_init_state(struct xfrm_state *x) -{ - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; - - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { @@ -402,40 +117,22 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp_resource_mutex); - if (!ipcomp_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp_resource_mutex); -error: - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -443,7 +140,7 @@ static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, - .init_state = ipcomp_init_state, + .init_state = ipcomp4_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output @@ -481,7 +178,7 @@ module_init(ipcomp4_init); module_exit(ipcomp4_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ed45037ce9be..42065fff46c4 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1,6 +1,4 @@ /* - * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $ - * * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or * user-supplied information to configure own IP address and routes. * @@ -434,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -854,7 +852,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str struct ic_device *d; int len, ext_len; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* Perform verifications before taking the lock. */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index af5cb53da5cc..29609d29df76 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,8 +1,6 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ - * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 * @@ -43,7 +41,7 @@ Made the tunnels use dev->name not tunnel: when error reporting. Added tx_dropped stat - -Alan Cox (Alan.Cox@linux.org) 21 March 95 + -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 Reworked: Changed to tunnel to destination gateway in addition to the @@ -368,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -392,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb) static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -405,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -418,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ if ((rt = skb->rtable) == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) @@ -433,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_IPIP }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } @@ -441,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -451,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -685,11 +683,6 @@ done: return err; } -static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -702,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev) { dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; - dev->get_stats = ipip_tunnel_get_stats; dev->do_ioctl = ipip_tunnel_ioctl; dev->change_mtu = ipip_tunnel_change_mtu; dev->destructor = free_netdev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 11700a4dcd95..b42e082cc170 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1,7 +1,7 @@ /* * IP multicast routing support for mrouted 3.6/3.8 * - * (c) 1995 Alan Cox, <alan@redhat.com> + * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * * This program is free software; you can redistribute it and/or @@ -9,8 +9,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ - * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code @@ -120,6 +118,31 @@ static struct timer_list ipmr_expire_timer; /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ +static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) +{ + dev_close(dev); + + dev = __dev_get_by_name(&init_net, "tunl0"); + if (dev) { + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); + ifr.ifr_ifru.ifru_data = (__force void __user *)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); + set_fs(oldfs); + } +} + static struct net_device *ipmr_new_tunnel(struct vifctl *v) { @@ -161,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (dev_open(dev)) goto failure; + dev_hold(dev); } } return dev; @@ -181,26 +205,20 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats*)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -209,8 +227,7 @@ static struct net_device *ipmr_reg_vif(void) struct net_device *dev; struct in_device *in_dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", - reg_vif_setup); + dev = alloc_netdev(0, "pimreg", reg_vif_setup); if (dev == NULL) return NULL; @@ -234,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void) if (dev_open(dev)) goto failure; + dev_hold(dev); + return dev; failure: @@ -248,9 +267,10 @@ failure: /* * Delete a VIF entry + * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(int vifi) +static int vif_delete(int vifi, int notify) { struct vif_device *v; struct net_device *dev; @@ -293,7 +313,7 @@ static int vif_delete(int vifi) ip_rt_multicast_event(in_dev); } - if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) unregister_netdevice(dev); dev_put(dev); @@ -398,6 +418,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) struct vif_device *v = &vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; + int err; /* Is vif busy ? */ if (VIF_EXISTS(vifi)) @@ -415,18 +436,34 @@ static int vif_add(struct vifctl *vifc, int mrtsock) dev = ipmr_reg_vif(); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + unregister_netdevice(dev); + dev_put(dev); + return err; + } break; #endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(vifc); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + ipmr_del_tunnel(dev, vifc); + dev_put(dev); + return err; + } break; case 0: dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; - dev_put(dev); + err = dev_set_allmulti(dev, 1); + if (err) { + dev_put(dev); + return err; + } break; default: return -EINVAL; @@ -435,7 +472,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock) if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; - dev_set_allmulti(dev, +1); ip_rt_multicast_event(in_dev); /* @@ -458,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - dev_hold(dev); v->dev=dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) @@ -805,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk) */ for (i=0; i<maxvif; i++) { if (!(vif_table[i].flags&VIFF_STATIC)) - vif_delete(i); + vif_delete(i, 0); } /* @@ -918,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt if (optname==MRT_ADD_VIF) { ret = vif_add(&vif, sk==mroute_socket); } else { - ret = vif_delete(vif.vifc_vifi); + ret = vif_delete(vif.vifc_vifi, 0); } rtnl_unlock(); return ret; @@ -1089,7 +1124,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct vif_device *v; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) @@ -1097,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v=&vif_table[0]; for (ct=0;ct<maxvif;ct++,v++) { if (v->dev==dev) - vif_delete(ct); + vif_delete(ct, 1); } return NOTIFY_DONE; } @@ -1143,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1170,8 +1205,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1206,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) to blackhole. */ - IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } @@ -1230,8 +1265,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; + vif->dev->stats.tx_packets++; + vif->dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1487,8 +1522,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1542,8 +1577,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -1887,16 +1922,36 @@ static struct net_protocol pim_protocol = { * Setup for IP multicast routing */ -void __init ip_mr_init(void) +int __init ip_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!mrt_cachep) + return -ENOMEM; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip_mr_notifier); + err = register_netdevice_notifier(&ip_mr_notifier); + if (err) + goto reg_notif_fail; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + goto proc_cache_fail; #endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); +#ifdef CONFIG_PROC_FS +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip_mr_vif"); +#endif + return err; } diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c deleted file mode 100644 index 4bf835e1d86d..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS - * - * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -ah_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -ah_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * AH is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void ah_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void ah_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_ah = { - .name = "AH", - .protocol = IPPROTO_AH, - .num_states = 1, - .dont_defrag = 1, - .init = ah_init, - .exit = ah_exit, - .conn_schedule = ah_conn_schedule, - .conn_in_get = ah_conn_in_get, - .conn_out_get = ah_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = ah_debug_packet, - .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, -}; diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c deleted file mode 100644 index db6a6b7b1a0b..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS - * - * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -esp_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * ESP is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void esp_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void esp_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_esp = { - .name = "ESP", - .protocol = IPPROTO_ESP, - .num_states = 1, - .dont_defrag = 1, - .init = esp_init, - .exit = esp_exit, - .conn_schedule = esp_conn_schedule, - .conn_in_get = esp_conn_in_get, - .conn_out_get = esp_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = esp_debug_packet, - .timeout_change = NULL, /* ISAKMP */ -}; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991d..6efdb70b3eb2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,6 +12,7 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { + struct net *net = dev_net(skb->dst->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; @@ -19,7 +20,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int hh_len; unsigned int type; - type = inet_addr_type(&init_net, iph->saddr); + type = inet_addr_type(net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,7 +36,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; /* Drop old route. */ @@ -43,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; odst = skb->dst; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2767841a8cef..3816e1dc9295 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -56,23 +61,30 @@ config IP_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_IPTABLES + # The matches. -config IP_NF_MATCH_RECENT - tristate '"recent" match support' - depends on IP_NF_IPTABLES +config IP_NF_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED help - This match is used for creating one or many lists of recently - used addresses and then matching against that/those list(s). + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... - Short options are available by using 'iptables -m recent -h' - Official Website: <http://snowman.net/projects/ipt_recent/> + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. To compile it as a module, choose M here. If unsure, say N. config IP_NF_MATCH_ECN tristate '"ecn" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `ECN' match, which allows you to match against @@ -80,19 +92,8 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_AH - tristate '"ah" match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This match extension allows you to match a range of SPIs - inside AH header of IPSec packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_TTL tristate '"ttl" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user @@ -100,21 +101,9 @@ config IP_NF_MATCH_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of @@ -136,7 +125,6 @@ config IP_NF_TARGET_REJECT config IP_NF_TARGET_LOG tristate "LOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -146,7 +134,6 @@ config IP_NF_TARGET_LOG config IP_NF_TARGET_ULOG tristate "ULOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n ---help--- @@ -167,7 +154,7 @@ config IP_NF_TARGET_ULOG # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 + depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n help The Full NAT option allows masquerading, port forwarding and other @@ -194,27 +181,26 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_REDIRECT - tristate "REDIRECT target support" +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_NETMAP - tristate "NETMAP target support" +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. It is similar to Fast NAT, except that - Netfilter's connection tracking doesn't work well with Fast NAT. + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. To compile it as a module, choose M here. If unsure, say N. @@ -263,44 +249,43 @@ config NF_NAT_PROTO_SCTP config NF_NAT_FTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -309,6 +294,19 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support (EXPERIMENTAL)" + depends on IP_NF_MANGLE && EXPERIMENTAL + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + help + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE @@ -339,23 +337,9 @@ config IP_NF_TARGET_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL - depends on NF_CONNTRACK_IPV4 - depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very @@ -365,6 +349,19 @@ config IP_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + +endif # IP_NF_IPTABLES + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" @@ -377,9 +374,10 @@ config IP_NF_ARPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_ARPTABLES + config IP_NF_ARPFILTER tristate "ARP packet filtering" - depends on IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and @@ -390,10 +388,11 @@ config IP_NF_ARPFILTER config IP_NF_ARP_MANGLE tristate "ARP payload mangling" - depends on IP_NF_ARPTABLES help Allows altering the ARP packet payload: source and destination hardware and network addresses. +endif # IP_NF_ARPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d9b92fbf5579..5f9b650d90fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -42,12 +45,12 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 03e83a65aec5..8d70d29f1ccf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("arp_tables: error: '%s'\n", (char *)targinfo); + printk("arp_tables: error: '%s'\n", + (const char *)par->targinfo); return NF_DROP; } @@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const char *indev, *outdev; void *table_base; const struct xt_table_info *private; + struct xt_target_param tgpar; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -245,6 +243,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); + tgpar.in = in; + tgpar.out = out; + tgpar.hooknum = hook; + tgpar.family = NFPROTO_ARP; + arp = arp_hdr(skb); do { if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -290,11 +293,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); /* Target might have changed stuff. */ arp = arp_hdr(skb); @@ -456,23 +458,24 @@ static inline int check_entry(struct arpt_entry *e, const char *name) static inline int check_target(struct arpt_entry *e, const char *name) { - struct arpt_entry_target *t; - struct xt_target *target; + struct arpt_entry_target *t = arpt_get_target(e); int ret; - - t = arpt_get_target(e); - target = t->u.kernel.target; - - ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), - name, e->comefrom, 0, 0); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static inline int @@ -488,7 +491,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, + t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); if (IS_ERR(target) || !target) { @@ -554,15 +558,19 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct arpt_entry_target *t; if (i && (*i)-- == 0) return 1; t = arpt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -788,7 +796,7 @@ static void compat_standard_from_user(void *dst, void *src) int v = *(compat_int_t *)src; if (v > 0) - v += xt_compat_calc_jump(NF_ARP, v); + v += xt_compat_calc_jump(NFPROTO_ARP, v); memcpy(dst, &v, sizeof(v)); } @@ -797,7 +805,7 @@ static int compat_standard_to_user(void __user *dst, void *src) compat_int_t cv = *(int *)src; if (cv > 0) - cv -= xt_compat_calc_jump(NF_ARP, cv); + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } @@ -815,7 +823,7 @@ static int compat_calc_entry(struct arpt_entry *e, t = arpt_get_target(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) return ret; @@ -866,9 +874,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT if (compat) - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -878,7 +886,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (compat) { struct xt_table_info tmp; ret = compat_table_info(private, &tmp); - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; } #endif @@ -901,7 +909,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) ret = t ? PTR_ERR(t) : -ENOENT; #ifdef CONFIG_COMPAT if (compat) - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); #endif return ret; } @@ -925,7 +933,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, return -EINVAL; } - t = xt_find_table_lock(net, NF_ARP, get.name); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; @@ -967,7 +975,7 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1134,7 +1142,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - t = xt_find_table_lock(net, NF_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1218,7 +1226,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); @@ -1232,7 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, off += xt_compat_target_offset(target); *size += off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) goto release_target; @@ -1333,7 +1341,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, check_compat_entry_size_and_hooks, @@ -1383,8 +1391,8 @@ static int translate_compat_table(const char *name, ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_copy_entry_from_user, &pos, &size, name, newinfo, entry1); - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); if (ret) goto free_newinfo; @@ -1420,8 +1428,8 @@ out: COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); goto out; } @@ -1607,8 +1615,8 @@ static int compat_get_entries(struct net *net, return -EINVAL; } - xt_compat_lock(NF_ARP); - t = xt_find_table_lock(net, NF_ARP, get.name); + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1623,13 +1631,13 @@ static int compat_get_entries(struct net *net, private->size, get.size); ret = -EAGAIN; } - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); } else ret = t ? PTR_ERR(t) : -ENOENT; - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); return ret; } @@ -1709,7 +1717,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } - try_then_request_module(xt_find_revision(NF_ARP, rev.name, + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; @@ -1787,7 +1795,7 @@ void arpt_unregister_table(struct xt_table *table) static struct xt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = NF_ARP, + .family = NFPROTO_ARP, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -1799,7 +1807,7 @@ static struct xt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, - .family = NF_ARP, + .family = NFPROTO_ARP, }; static struct nf_sockopt_ops arpt_sockopts = { @@ -1821,12 +1829,12 @@ static struct nf_sockopt_ops arpt_sockopts = { static int __net_init arp_tables_net_init(struct net *net) { - return xt_proto_init(net, NF_ARP); + return xt_proto_init(net, NFPROTO_ARP); } static void __net_exit arp_tables_net_exit(struct net *net) { - xt_proto_fini(net, NF_ARP); + xt_proto_fini(net, NFPROTO_ARP); } static struct pernet_operations arp_tables_net_ops = { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a385959d2655..b0d5b1d0a769 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - unsigned int hooknum, const struct xt_target *target, - const void *targinfo) +target(struct sk_buff *skb, const struct xt_target_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; @@ -57,11 +54,9 @@ target(struct sk_buff *skb, return mangle->target; } -static bool -checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int hook_mask) +static bool checkentry(const struct xt_tgchk_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) @@ -75,7 +70,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3be4d07e7ed9..bee3d117661a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -51,38 +51,59 @@ static struct xt_table packet_filter = { .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .private = NULL, .me = THIS_MODULE, - .af = NF_ARP, + .af = NFPROTO_ARP, }; /* The work comes in here from netfilter.c */ -static unsigned int arpt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int arpt_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); +} + +static unsigned int arpt_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.arptable_filter); +} + +static unsigned int arpt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { { - .hook = arpt_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_out_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_forward_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, .priority = NF_IP_PRI_FILTER, }, diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 26a37cedcf2e..432ce9d1c11c 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; @@ -480,7 +477,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4e7c719445c2..213fb27debc1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -171,31 +171,25 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ipt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + printk("ip_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +do_match(struct ipt_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, ip_hdrlen(skb), hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -326,7 +320,6 @@ ipt_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; const struct iphdr *ip; u_int16_t datalen; bool hotdrop = false; @@ -336,6 +329,8 @@ ipt_do_table(struct sk_buff *skb, void *table_base; struct ipt_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ ip = ip_hdr(skb); @@ -348,7 +343,13 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.thoff = ip_hdrlen(skb); + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV4; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -362,12 +363,11 @@ ipt_do_table(struct sk_buff *skb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + if (ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff)) { struct ipt_entry_target *t; - if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, &hotdrop) != 0) + if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); @@ -413,16 +413,14 @@ ipt_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); - + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec @@ -575,12 +573,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ipt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -606,34 +609,28 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ipt_ip *ip = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -648,7 +645,7 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -660,23 +657,25 @@ err: static int check_target(struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; - struct xt_target *target; + struct ipt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; int ret; - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -687,14 +686,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -769,6 +772,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, static int cleanup_entry(struct ipt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ipt_entry_target *t; if (i && (*i)-- == 0) @@ -777,9 +781,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Cleanup all matches */ IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1648,12 +1656,16 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { + struct xt_mtchk_param mtpar; unsigned int j; int ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2121,30 +2133,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2155,15 +2160,9 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static bool -icmp_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IPT_ICMP_INV); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1819ad7ab910..7ac1677419a9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -clusterip_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t hash; @@ -349,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -clusterip_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; @@ -406,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } cipinfo->config = config; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->target->family); return false; } @@ -416,9 +411,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } /* drop reference count of cluster config when rule is deleted */ -static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -426,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->target->family); } #ifdef CONFIG_COMPAT @@ -445,7 +440,7 @@ struct compat_ipt_clusterip_tgt_info static struct xt_target clusterip_tg_reg __read_mostly = { .name = "CLUSTERIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = clusterip_tg, .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, @@ -475,11 +470,10 @@ static void arp_print(struct arp_payload *payload) #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; @@ -547,7 +541,7 @@ arp_mangle(unsigned int hook, static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index d60139c134ca..f7e2fa0974dc 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) } static unsigned int -ecn_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_ECN_info *einfo = targinfo; + const struct ipt_ECN_info *einfo = par->targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) if (!set_ect_ip(skb, einfo)) @@ -95,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ecn_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ecn_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ECN_info *einfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", @@ -124,7 +119,7 @@ ecn_tg_check(const char *tablename, const void *e_void, static struct xt_target ecn_tg_reg __read_mostly = { .name = "ECN", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ecn_tg, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0af14137137b..fc6ce04a3e35 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ipt_log_packet(unsigned int pf, +ipt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -426,28 +426,23 @@ ipt_log_packet(unsigned int pf, } static unsigned int -log_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -463,7 +458,7 @@ log_tg_check(const char *tablename, const void *e, static struct xt_target log_tg_reg __read_mostly = { .name = "LOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = log_tg, .targetsize = sizeof(struct ipt_log_info), .checkentry = log_tg_check, @@ -483,7 +478,7 @@ static int __init log_tg_init(void) ret = xt_register_target(&log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_INET, &ipt_log_logger); + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 84c26dd27d81..f389f60cb105 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static bool -masquerade_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("masquerade_check: bad MAP_IPS.\n"); @@ -50,9 +47,7 @@ masquerade_tg_check(const char *tablename, const void *e, } static unsigned int -masquerade_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; struct nf_conn_nat *nat; @@ -62,7 +57,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, const struct rtable *rt; __be32 newsrc; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); @@ -76,16 +71,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; - mr = targinfo; + mr = par->targinfo; rt = skb->rtable; - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { - printk("MASQUERADE: %s ate my IP address\n", out->name); + printk("MASQUERADE: %s ate my IP address\n", par->out->name); return NF_DROP; } write_lock_bh(&masq_lock); - nat->masq_index = out->ifindex; + nat->masq_index = par->out->ifindex; write_unlock_bh(&masq_lock); /* Transfer from original range. */ @@ -119,9 +114,7 @@ static int masq_device_event(struct notifier_block *this, void *ptr) { const struct net_device *dev = ptr; - - if (dev_net(dev) != &init_net) - return NOTIFY_DONE; + struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -129,7 +122,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; @@ -153,7 +147,7 @@ static struct notifier_block masq_inet_notifier = { static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = masquerade_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6739abfd1521..7c29582d4ec8 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -22,12 +22,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); -static bool -netmap_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { pr_debug("NETMAP:check: bad MAP_IPS.\n"); @@ -41,24 +38,23 @@ netmap_tg_check(const char *tablename, const void *e, } static unsigned int -netmap_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_POST_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -70,12 +66,12 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); } static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = netmap_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 5c6292449d13..698e5e78685b 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ -static bool -redirect_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("redirect_check: bad MAP_IPS.\n"); @@ -45,24 +42,22 @@ redirect_tg_check(const char *tablename, const void *e, } static unsigned int -redirect_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ - if (hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_LOCAL_OUT) newdst = htonl(0x7F000001); else { struct in_device *indev; @@ -92,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = redirect_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 2639872849da..0b4b6e0ff2b9 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) } static unsigned int -reject_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_reject_info *reject = targinfo; + const struct ipt_reject_info *reject = par->targinfo; /* WARNING: This code causes reentry within iptables. This means that the iptables jump stack is now crap. We @@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, hooknum); + send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; @@ -177,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_reject_info *rejinfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); @@ -201,7 +196,7 @@ reject_tg_check(const char *tablename, const void *e_void, static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = reject_tg, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 30eed65e7338..6d76aae90cc0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); MODULE_LICENSE("GPL"); static unsigned int -ttl_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph; - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; int new_ttl; if (!skb_make_writable(skb, skb->len)) @@ -61,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ttl_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ttl_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", @@ -80,7 +75,7 @@ ttl_tg_check(const char *tablename, const void *e, static struct xt_target ttl_tg_reg __read_mostly = { .name = "TTL", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ttl_tg, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b192756c6d0d..18a2826b57c6 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -281,18 +281,14 @@ alloc_failure: } static unsigned int -ulog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - - ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); - + ipt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); return XT_CONTINUE; } -static void ipt_logfn(unsigned int pf, +static void ipt_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -317,12 +313,9 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static bool -ulog_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hookmask) +static bool ulog_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ulog_info *loginfo = targinfo; + const struct ipt_ulog_info *loginfo = par->targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("ipt_ULOG: prefix term %i\n", @@ -374,7 +367,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src) static struct xt_target ulog_tg_reg __read_mostly = { .name = "ULOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ulog_tg, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ulog_tg_check, @@ -419,7 +412,7 @@ static int __init ulog_tg_init(void) return ret; } if (nflog) - nf_log_register(PF_INET, &ipt_ulog_logger); + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 49587a497229..88762f02779d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr, } static bool -addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info *info = matchinfo; + const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -50,36 +47,30 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info_v1 *info = matchinfo; + const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; bool ret = true; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = in; + dev = par->in; else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = out; + dev = par->out; if (info->source) ret &= match_type(dev, iph->saddr, info->source) ^ (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(dev, iph->daddr, info->dest) ^ - (info->flags & IPT_ADDRTYPE_INVERT_DEST); + !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } -static bool -addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { - struct ipt_addrtype_info_v1 *info = matchinfo; + struct ipt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { @@ -88,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) && + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { printk(KERN_ERR "ipt_addrtype: output interface limitation " "not valid in PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) && + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { printk(KERN_ERR "ipt_addrtype: input interface limitation " "not valid in POST_ROUTING and OUTPUT\n"); @@ -108,14 +101,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, static struct xt_match addrtype_mt_reg[] __read_mostly = { { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = addrtype_mt_v0, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE }, { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index e977989629c7..0104c0b399de 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ah = skb_header_pointer(skb, protoff, - sizeof(_ahdr), &_ahdr); + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return 0; } @@ -65,13 +61,9 @@ ah_mt(const struct sk_buff *skb, const struct net_device *in, !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { @@ -83,7 +75,7 @@ ah_mt_check(const char *tablename, const void *ip_void, static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 749de8284ce5..6289b64144c6 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb, return true; } -static bool -ecn_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ecn_info *info = matchinfo; + const struct ipt_ecn_info *info = par->matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) @@ -81,20 +78,17 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in, if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) return false; - if (!match_tcp(skb, info, hotdrop)) + if (!match_tcp(skb, info, par->hotdrop)) return false; } return true; } -static bool -ecn_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ecn_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ecn_info *info = matchinfo; - const struct ipt_ip *ip = ip_void; + const struct ipt_ecn_info *info = par->matchinfo; + const struct ipt_ip *ip = par->entryinfo; if (info->operation & IPT_ECN_OP_MATCH_MASK) return false; @@ -114,7 +108,7 @@ ecn_mt_check(const char *tablename, const void *ip_void, static struct xt_match ecn_mt_reg __read_mostly = { .name = "ecn", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ecn_mt, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = ecn_mt_check, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index e0b8caeb710c..297f1cbf4ff5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); MODULE_LICENSE("GPL"); -static bool -ttl_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ttl_info *info = matchinfo; + const struct ipt_ttl_info *info = par->matchinfo; const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { @@ -46,7 +43,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match ttl_mt_reg __read_mostly = { .name = "ttl", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1ea677dcf845..c9224310ebae 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -81,7 +81,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook, } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_filter); + dev_net(out)->ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index da59182f2226..69f2c4287146 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_post_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook, tos = iph->tos; ret = ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fddce7754b72..8faebfe638f1 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_raw); + dev_net(in)->ipv4.iptable_raw); } static unsigned int @@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_raw); + dev_net(out)->ipv4.iptable_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 000000000000..36f3be3cc428 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,180 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __net_initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET, +}; + +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.iptable_security); +} + +static unsigned int +ipt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.iptable_security); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Somebody is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk(KERN_INFO "iptable_security: ignoring short " + "SOCK_RAW packet.\n"); + return NF_ACCEPT; + } + return ipt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.iptable_security); +} + +static struct nf_hook_ops ipt_ops[] __read_mostly = { + { + .hook = ipt_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SECURITY, + }, +}; + +static int __net_init iptable_security_net_init(struct net *net) +{ + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv4.iptable_security)) + return PTR_ERR(net->ipv4.iptable_security); + + return 0; +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c440364..4a7c35275396 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * @@ -24,6 +25,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,35 +129,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,20 +151,13 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, @@ -209,13 +165,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, @@ -254,7 +203,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -270,7 +219,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -278,7 +227,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -323,7 +272,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 40a46d482490..313ebf00ee36 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,33 +18,23 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> - -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif +#include <net/netfilter/nf_conntrack_acct.h> struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -54,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -127,7 +118,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -138,7 +129,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) @@ -170,8 +161,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, - sizeof(struct ct_iter_state)); + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -179,21 +170,23 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; /* expects */ struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -203,13 +196,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -277,8 +271,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -286,11 +280,12 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -300,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -308,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -326,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -366,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -374,39 +372,54 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -int __init nf_conntrack_ipv4_compat_init(void) +static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; proc_stat = proc_create("ip_conntrack", S_IRUGO, - init_net.proc_net_stat, &ct_cpu_seq_fops); + net->proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; return 0; err3: - proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack_expect"); err2: - proc_net_remove(&init_net, "ip_conntrack"); + proc_net_remove(net, "ip_conntrack"); err1: return -ENOMEM; } +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + proc_net_remove(net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack"); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "ip_conntrack_expect"); - proc_net_remove(&init_net, "ip_conntrack"); + unregister_pernet_subsys(&ip_conntrack_net_ops); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 78ab19accace..4e8879220222 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -87,12 +87,11 @@ static int icmp_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } @@ -124,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -156,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -173,8 +172,8 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -182,16 +181,16 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -204,7 +203,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; @@ -218,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 000000000000..aa2c50a180f7 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,96 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif + + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index d2a887fc8d9b..2ac9eaf1a8c9 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -240,12 +239,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (maniptype == IP_NAT_MANIP_SRC && + !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!nf_nat_used_tuple(tuple, ct)) + return; } } @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +634,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -629,23 +661,9 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 11976ea29884..cf7a42bf9820 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -16,6 +16,7 @@ #include <linux/udp.h> #include <net/checksum.h> #include <net/tcp.h> +#include <net/route.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> @@ -192,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5c..9eb171056c63 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; @@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 91537f11273f..6c4f11f51446 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - off = *rover; if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) - off = net_random(); + off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, + maniptype == IP_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; for (i = 0; i < range_size; i++, off++) { *portptr = htons(min + off % range_size); diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 82e4c0e286b8..65e470bc6123 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb, sctp_sctphdr_t *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; - u32 crc32; + __be32 crc32; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb, crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb), crc32); crc32 = sctp_end_cksum(crc32); - hdr->checksum = htonl(crc32); + hdr->checksum = crc32; return true; } diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439e..bea54a685109 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,47 +58,42 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); - NF_CT_ASSERT(out); + NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) +static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; struct rtable *rt; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return; if (rt->rt_src != srcip && !warned) { @@ -110,40 +105,32 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (hooknum == NF_INET_LOCAL_OUT && + if (par->hooknum == NF_INET_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(skb)->daddr, + warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } -static bool ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -153,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename, return true; } -static bool ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -194,9 +177,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +210,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +249,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +258,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4334d5cabc5b..14544320c545 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb, buffer, buflen); } -static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, - enum sdp_header_types type, - enum sdp_header_types term, - char *buffer, int buflen) +static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, &matchoff, &matchlen) <= 0) - return 0; + return -ENOENT; return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen); + buffer, buflen) ? 0 : -EINVAL; } static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, @@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, - buffer, buflen)) + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + buffer, buflen)) return 0; return mangle_content_len(skb, dptr, datalen); @@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, - buffer, buflen)) + switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + buffer, buflen)) { + case 0: + /* + * RFC 2327: + * + * Session description + * + * c=* (connection information - not required if included in all media) + */ + case -ENOENT: + break; + default: return 0; + } return mangle_content_len(skb, dptr, datalen); } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 7750c97fde7b..ffeaffc3fffe 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned int *len) { unsigned long subid; - unsigned int size; unsigned long *optr; + size_t size; size = eoc - ctx->pointer + 1; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 552169b41b16..8f5a403f6f6b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -7,8 +7,6 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $ - * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> @@ -73,32 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) static int sockstat_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, sockstat_seq_show, net); - if (err < 0) - goto err_open; - - return 0; - -err_open: - put_net(net); -err_net: - return err; -} - -static int sockstat_seq_release(struct inode *inode, struct file *file) -{ - struct net *net = ((struct seq_file *)file->private_data)->private; - - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, sockstat_seq_show); } static const struct file_operations sockstat_seq_fops = { @@ -106,7 +79,7 @@ static const struct file_operations sockstat_seq_fops = { .open = sockstat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = sockstat_seq_release, + .release = single_release_net, }; /* snmp items */ @@ -259,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), + SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_SENTINEL }; @@ -268,11 +243,12 @@ static void icmpmsg_put(struct seq_file *seq) int j, i, count; static int out[PERLINE]; + struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - if (snmp_fold_field((void **) icmpmsg_statistics, i)) + if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) out[count++] = i; if (count < PERLINE) continue; @@ -284,7 +260,7 @@ static void icmpmsg_put(struct seq_file *seq) seq_printf(seq, "\nIcmpMsg: "); for (j = 0; j < PERLINE; ++j) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, out[j])); seq_putc(seq, '\n'); } @@ -296,7 +272,7 @@ static void icmpmsg_put(struct seq_file *seq) seq_printf(seq, "\nIcmpMsg:"); for (j = 0; j < count; ++j) seq_printf(seq, " %lu", snmp_fold_field((void **) - icmpmsg_statistics, out[j])); + net->mib.icmpmsg_statistics, out[j])); } #undef PERLINE @@ -305,6 +281,7 @@ static void icmpmsg_put(struct seq_file *seq) static void icmp_put(struct seq_file *seq) { int i; + struct net *net = seq->private; seq_puts(seq, "\nIcmp: InMsgs InErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) @@ -313,18 +290,18 @@ static void icmp_put(struct seq_file *seq) for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu", - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS)); + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) icmpmsg_statistics, + snmp_fold_field((void **) net->mib.icmpmsg_statistics, icmpmibmap[i].index | 0x100)); } @@ -334,6 +311,7 @@ static void icmp_put(struct seq_file *seq) static int snmp_seq_show(struct seq_file *seq, void *v) { int i; + struct net *net = seq->private; seq_puts(seq, "Ip: Forwarding DefaultTTL"); @@ -341,12 +319,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, + IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)ip_statistics, + snmp_fold_field((void **)net->mib.ip_statistics, snmp4_ipstats_list[i].entry)); icmp_put(seq); /* RFC 2011 compatibility */ @@ -361,11 +339,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void **)tcp_statistics, + snmp_fold_field((void **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void **)tcp_statistics, + snmp_fold_field((void **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -376,7 +354,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)udp_statistics, + snmp_fold_field((void **)net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -387,7 +365,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)udplite_statistics, + snmp_fold_field((void **)net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -396,7 +374,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) static int snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, snmp_seq_show, NULL); + return single_open_net(inode, file, snmp_seq_show); } static const struct file_operations snmp_seq_fops = { @@ -404,7 +382,7 @@ static const struct file_operations snmp_seq_fops = { .open = snmp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; @@ -415,6 +393,7 @@ static const struct file_operations snmp_seq_fops = { static int netstat_seq_show(struct seq_file *seq, void *v) { int i; + struct net *net = seq->private; seq_puts(seq, "TcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) @@ -423,7 +402,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net_statistics, + snmp_fold_field((void **)net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -433,7 +412,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)ip_statistics, + snmp_fold_field((void **)net->mib.ip_statistics, snmp4_ipextstats_list[i].entry)); seq_putc(seq, '\n'); @@ -442,7 +421,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) static int netstat_seq_open(struct inode *inode, struct file *file) { - return single_open(file, netstat_seq_show, NULL); + return single_open_net(inode, file, netstat_seq_show); } static const struct file_operations netstat_seq_fops = { @@ -450,18 +429,32 @@ static const struct file_operations netstat_seq_fops = { .open = netstat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static __net_init int ip_proc_init_net(struct net *net) { if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) - return -ENOMEM; + goto out_sockstat; + if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops)) + goto out_netstat; + if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops)) + goto out_snmp; + return 0; + +out_snmp: + proc_net_remove(net, "netstat"); +out_netstat: + proc_net_remove(net, "sockstat"); +out_sockstat: + return -ENOMEM; } static __net_exit void ip_proc_exit_net(struct net *net) { + proc_net_remove(net, "snmp"); + proc_net_remove(net, "netstat"); proc_net_remove(net, "sockstat"); } @@ -472,24 +465,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = { int __init ip_misc_proc_init(void) { - int rc = 0; - - if (register_pernet_subsys(&ip_proc_ops)) - goto out_pernet; - - if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) - goto out_netstat; - - if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) - goto out_snmp; -out: - return rc; -out_snmp: - proc_net_remove(&init_net, "netstat"); -out_netstat: - unregister_pernet_subsys(&ip_proc_ops); -out_pernet: - rc = -ENOMEM; - goto out; + return register_pernet_subsys(&ip_proc_ops); } diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 971ab9356e51..ea50da0649fd 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -5,8 +5,6 @@ * * INET protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 37a1ecd9d600..cd975743bcd2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,8 +5,6 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * @@ -322,6 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; @@ -370,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(((struct icmphdr *) + icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, @@ -386,7 +385,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -608,12 +607,11 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int raw_destroy(struct sock *sk) +static void raw_destroy(struct sock *sk) { lock_sock(sk); ip_flush_pending_frames(sk); release_sock(sk); - return 0; } /* This gets rid of all the nasties in af_inet. -DaveM */ @@ -947,7 +945,7 @@ static int raw_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_printf(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode drops\n"); + "inode ref pointer drops\n"); else raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 96be336064fb..a6d7c584f53b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,8 +5,6 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> @@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); -static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table __read_mostly; static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; -static atomic_t rt_genid __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) +static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, + int genid) { return jhash_3words((__force u32)(__be32)(daddr), (__force u32)(__be32)(saddr), - idx, atomic_read(&rt_genid)) + idx, genid) & rt_hash_mask; } +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->ipv4.rt_genid); +} + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { struct seq_net_private p; @@ -280,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { + if (!rt_hash_table[st->bucket].chain) + continue; rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st->bucket].chain); while (r) { @@ -297,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, struct rtable *r) { struct rt_cache_iter_state *st = seq->private; + r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); - if (--st->bucket < 0) - break; + do { + if (--st->bucket < 0) + return NULL; + } while (!rt_hash_table[st->bucket].chain); rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } @@ -336,7 +343,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) struct rt_cache_iter_state *st = seq->private; if (*pos) return rt_cache_get_idx(seq, *pos - 1); - st->genid = atomic_read(&rt_genid); + st->genid = rt_genid(seq_file_net(seq)); return SEQ_START_TOKEN; } @@ -683,6 +690,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); } +static inline int rt_is_expired(struct rtable *rth) +{ + return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); +} + /* * Perform a full scan of hash table and free all entries. * Can be called by a softirq or a process. @@ -692,6 +704,7 @@ static void rt_do_flush(int process_context) { unsigned int i; struct rtable *rth, *next; + struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { if (process_context && need_resched()) @@ -701,11 +714,39 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); +#ifdef CONFIG_NET_NS + { + struct rtable ** prev, * p; + + rth = rt_hash_table[i].chain; + + /* defer releasing the head of the list after spin_unlock */ + for (tail = rth; tail; tail = tail->u.dst.rt_next) + if (!rt_is_expired(tail)) + break; + if (rth != tail) + rt_hash_table[i].chain = tail; + + /* call rt_free on entries after the tail requiring flush */ + prev = &rt_hash_table[i].chain; + for (p = *prev; p; p = next) { + next = p->u.dst.rt_next; + if (!rt_is_expired(p)) { + prev = &p->u.dst.rt_next; + } else { + *prev = next; + rt_free(p); + } + } + } +#else rth = rt_hash_table[i].chain; rt_hash_table[i].chain = NULL; + tail = NULL; +#endif spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth; rth = next) { + for (; rth != tail; rth = next) { next = rth->u.dst.rt_next; rt_free(rth); } @@ -738,7 +779,7 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -781,21 +822,21 @@ static void rt_worker_func(struct work_struct *work) * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_cache_invalidate(void) +static void rt_cache_invalidate(struct net *net) { unsigned char shuffle; get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &rt_genid); + atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } /* * delay < 0 : invalidate cache (fast : entries will be deleted later) * delay >= 0 : invalidate & flush cache (can be long) */ -void rt_cache_flush(int delay) +void rt_cache_flush(struct net *net, int delay) { - rt_cache_invalidate(); + rt_cache_invalidate(net); if (delay >= 0) rt_do_flush(!in_softirq()); } @@ -803,10 +844,11 @@ void rt_cache_flush(int delay) /* * We change rt_genid and let gc do the cleanup */ -static void rt_secret_rebuild(unsigned long dummy) +static void rt_secret_rebuild(unsigned long __net) { - rt_cache_invalidate(); - mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); + struct net *net = (struct net *)__net; + rt_cache_invalidate(net); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } /* @@ -882,7 +924,7 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid == atomic_read(&rt_genid) && + if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; @@ -964,7 +1006,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -1140,7 +1182,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); while ((aux = *rthp) != NULL) { - if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + if (aux == rt || rt_is_expired(aux)) { *rthp = aux->u.dst.rt_next; rt_free(aux); continue; @@ -1182,7 +1224,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rthp=&rt_hash_table[hash].chain; @@ -1194,7 +1237,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - rth->rt_genid != atomic_read(&rt_genid) || + rt_is_expired(rth) || !net_eq(dev_net(rth->u.dst.dev), net)) { rthp = &rth->u.dst.rt_next; continue; @@ -1233,7 +1276,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1297,7 +1340,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif); + rt->fl.oif, + rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " NIPQUAD_FMT "/%02x dropped\n", @@ -1390,7 +1434,8 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); + IP_INC_STATS_BH(dev_net(rt->u.dst.dev), + IPSTATS_MIB_INNOROUTES); break; case EACCES: code = ICMP_PKT_FILTERED; @@ -1446,7 +1491,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, for (k = 0; k < 2; k++) { for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1461,21 +1507,21 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - rth->rt_genid != atomic_read(&rt_genid)) + rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && + old_mtu >= dst_mtu(&rth->u.dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { - if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu <= dst_mtu(&rth->u.dst)) { + if (mtu < dst_mtu(&rth->u.dst)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1497,7 +1543,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && + if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1626,7 +1672,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) + if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, @@ -1696,7 +1742,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { @@ -1711,7 +1757,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash(daddr, saddr, dev->ifindex); + hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, &skb->rtable); e_nobufs: @@ -1837,7 +1883,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); rt_set_nexthop(rth, res, itag); @@ -1872,7 +1918,8 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); + hash = rt_hash(daddr, saddr, fl->iif, + rt_genid(dev_net(rth->u.dst.dev))); return rt_intern_hash(hash, rth, &skb->rtable); } @@ -1998,7 +2045,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2028,7 +2075,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif); + hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, &skb->rtable); goto done; @@ -2079,7 +2126,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, net = dev_net(dev); tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif); + hash = rt_hash(daddr, saddr, iif, rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2091,7 +2138,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2219,7 +2266,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -2268,7 +2315,8 @@ static int ip_mkroute_output(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); err = rt_intern_hash(hash, rth, rp); } @@ -2313,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2329,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2347,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } @@ -2480,7 +2534,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2493,7 +2547,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2524,7 +2578,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { }; -static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) +static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) @@ -2548,7 +2602,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2584,7 +2638,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) - err = ipv4_dst_blackhole(rp, flp); + err = ipv4_dst_blackhole(net, rp, flp); return err; } @@ -2797,13 +2851,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (s_h < 0) s_h = 0; s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++) { + for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { + if (!rt_hash_table[h].chain) + continue; rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; - if (rt->rt_genid != atomic_read(&rt_genid)) + if (rt_is_expired(rt)) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, @@ -2816,7 +2872,6 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); - s_idx = 0; } done: @@ -2827,19 +2882,25 @@ done: void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(0); + rt_cache_flush(dev_net(in_dev->dev), 0); } #ifdef CONFIG_SYSCTL -static int flush_delay; - -static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, +static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - rt_cache_flush(flush_delay); + int flush_delay; + ctl_table ctl; + struct net *net; + + memcpy(&ctl, __ctl, sizeof(ctl)); + ctl.data = &flush_delay; + proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + + net = (struct net *)__ctl->extra1; + rt_cache_flush(net, flush_delay); return 0; } @@ -2855,24 +2916,79 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, size_t newlen) { int delay; + struct net *net; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay, (int __user *)newval)) return -EFAULT; - rt_cache_flush(delay); + net = (struct net *)table->extra1; + rt_cache_flush(net, delay); return 0; } -ctl_table ipv4_route_table[] = { - { - .ctl_name = NET_IPV4_ROUTE_FLUSH, - .procname = "flush", - .data = &flush_delay, - .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, - }, +static void rt_secret_reschedule(int old) +{ + struct net *net; + int new = ip_rt_secret_interval; + int diff = new - old; + + if (!diff) + return; + + rtnl_lock(); + for_each_net(net) { + int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + + if (!new) + continue; + + if (deleted) { + long time = net->ipv4.rt_secret_timer.expires - jiffies; + + if (time <= 0 || (time += diff) <= 0) + time = 0; + + net->ipv4.rt_secret_timer.expires = time; + } else + net->ipv4.rt_secret_timer.expires = new; + + net->ipv4.rt_secret_timer.expires += jiffies; + add_timer(&net->ipv4.rt_secret_timer); + } + rtnl_unlock(); +} + +static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old = ip_rt_secret_interval; + int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + + rt_secret_reschedule(old); + + return ret; +} + +static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, + int __user *name, + int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + int old = ip_rt_secret_interval; + int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, + newlen); + + rt_secret_reschedule(old); + + return ret; +} + +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3006,13 +3122,120 @@ ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = &ipv4_sysctl_rt_secret_interval, + .strategy = &ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; + +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .mode = 0555, .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .mode = 0555, .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { }, +}; + +static struct ctl_table ipv4_route_flush_table[] = { + { + .ctl_name = NET_IPV4_ROUTE_FLUSH, + .procname = "flush", + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = &ipv4_sysctl_rtcache_flush, + .strategy = &ipv4_sysctl_rtcache_flush_strategy, + }, + { .ctl_name = 0 }, +}; + +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + +static __net_init int sysctl_route_net_init(struct net *net) +{ + struct ctl_table *tbl; + + tbl = ipv4_route_flush_table; + if (net != &init_net) { + tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } + tbl[0].extra1 = net; + + net->ipv4.route_hdr = + register_net_sysctl_table(net, ipv4_route_path, tbl); + if (net->ipv4.route_hdr == NULL) + goto err_reg; + return 0; + +err_reg: + if (tbl != ipv4_route_flush_table) + kfree(tbl); +err_dup: + return -ENOMEM; +} + +static __net_exit void sysctl_route_net_exit(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->ipv4.route_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.route_hdr); + BUG_ON(tbl == ipv4_route_flush_table); + kfree(tbl); +} + +static __net_initdata struct pernet_operations sysctl_route_ops = { + .init = sysctl_route_net_init, + .exit = sysctl_route_net_exit, +}; #endif + +static __net_init int rt_secret_timer_init(struct net *net) +{ + atomic_set(&net->ipv4.rt_genid, + (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); + + net->ipv4.rt_secret_timer.function = rt_secret_rebuild; + net->ipv4.rt_secret_timer.data = (unsigned long)net; + init_timer_deferrable(&net->ipv4.rt_secret_timer); + + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } + return 0; +} + +static __net_exit void rt_secret_timer_exit(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); +} + +static __net_initdata struct pernet_operations rt_secret_timer_ops = { + .init = rt_secret_timer_init, + .exit = rt_secret_timer_exit, +}; + + #ifdef CONFIG_NET_CLS_ROUTE struct ip_rt_acct *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ @@ -3031,9 +3254,6 @@ int __init ip_rt_init(void) { int rc = 0; - atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) @@ -3065,19 +3285,14 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - rt_secret_timer.function = rt_secret_rebuild; - rt_secret_timer.data = 0; - init_timer_deferrable(&rt_secret_timer); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&rt_secret_timer); + if (register_pernet_subsys(&rt_secret_timer_ops)) + printk(KERN_ERR "Unable to setup rt_secret_timer\n"); if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); @@ -3087,9 +3302,23 @@ int __init ip_rt_init(void) #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); +#ifdef CONFIG_SYSCTL + register_pernet_subsys(&sysctl_route_ops); +#endif return rc; } +#ifdef CONFIG_SYSCTL +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_path, ipv4_skeleton); +} +#endif + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d182a2a26291..d346c22aa6ae 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -8,8 +8,6 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - * - * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ */ #include <linux/tcp.h> @@ -18,6 +16,7 @@ #include <linux/cryptohash.h> #include <linux/kernel.h> #include <net/tcp.h> +#include <net/route.h> /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -175,7 +174,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) ; *mssp = msstab[mssind] + 1; - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), @@ -271,11 +270,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || (mss = cookie_check(skb, cookie)) == 0) { - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -298,9 +297,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -338,6 +339,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c437f804ee38..276d047fb85a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,8 +1,6 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $ - * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ @@ -28,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -46,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -56,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -75,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -85,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) @@ -234,6 +229,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &ipv4_doint_and_flush, .strategy = &ipv4_doint_and_flush_strategy, + .extra2 = &init_net, }, { .ctl_name = NET_IPV4_NO_PMTU_DISC, @@ -397,19 +393,12 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, @@ -795,7 +784,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fc54a48fde1e..eccb7165a80c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -255,11 +253,14 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/skbuff.h> +#include <linux/scatterlist.h> #include <linux/splice.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/random.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> @@ -276,8 +277,6 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; - atomic_t tcp_orphan_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -315,10 +314,10 @@ int tcp_memory_pressure __read_mostly; EXPORT_SYMBOL(tcp_memory_pressure); -void tcp_enter_memory_pressure(void) +void tcp_enter_memory_pressure(struct sock *sk) { if (!tcp_memory_pressure) { - NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); tcp_memory_pressure = 1; } } @@ -343,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events - by poll logic and correct handling of state changes - made by another threads is impossible in any case. + * by poll logic and correct handling of state changes + * made by other threads is impossible in any case. */ mask = 0; @@ -370,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX - * solve this dilemma. I would prefer, if PULLHUP were maskable, + * solve this dilemma. I would prefer, if POLLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains - * why PULLHUP is incompatible with POLLOUT. --ANK + * why POLLHUP is incompatible with POLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK @@ -385,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected? */ if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int target = sock_rcvlowat(sk, 0, INT_MAX); + + if (tp->urg_seq == tp->copied_seq && + !sock_flag(sk, SOCK_URGINLINE) && + tp->urg_data) + target--; + /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken * in SYN_* states. */ - if ((tp->rcv_nxt != tp->copied_seq) && - (tp->urg_seq != tp->copied_seq || - tp->rcv_nxt != tp->copied_seq + 1 || - sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data)) + if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { @@ -494,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { - if (flags & MSG_OOB) { - tp->urg_mode = 1; + if (flags & MSG_OOB) tp->snd_up = tp->write_seq; - } } static inline void tcp_push(struct sock *sk, int flags, int mss_now, @@ -648,7 +649,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) } __kfree_skb(skb); } else { - sk->sk_prot->enter_memory_pressure(); + sk->sk_prot->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return NULL; @@ -1097,7 +1098,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) #if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); + WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif if (inet_csk_ack_scheduled(sk)) { @@ -1152,13 +1153,13 @@ static void tcp_prequeue_process(struct sock *sk) struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); /* RX process wants to run with disabled BHs, though it is not * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ @@ -1206,7 +1207,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { - size_t used, len; + int used; + size_t len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ @@ -1358,7 +1360,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - BUG_TRAP(flags & MSG_PEEK); + WARN_ON(!(flags & MSG_PEEK)); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -1421,8 +1423,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.len = len; - BUG_TRAP(tp->copied_seq == tp->rcv_nxt || - (flags & (MSG_PEEK | MSG_TRUNC))); + WARN_ON(tp->copied_seq != tp->rcv_nxt && + !(flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise @@ -1473,7 +1475,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } @@ -1484,7 +1486,7 @@ do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1599,7 +1601,7 @@ skip_copy: tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1666,12 +1668,12 @@ void tcp_set_state(struct sock *sk, int state) switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) - TCP_INC_STATS(TCP_MIB_CURRESTAB); + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); break; case TCP_CLOSE: if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) - TCP_INC_STATS(TCP_MIB_ESTABRESETS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); if (inet_csk(sk)->icsk_bind_hash && @@ -1680,7 +1682,7 @@ void tcp_set_state(struct sock *sk, int state) /* fall through */ default: if (oldstate==TCP_ESTABLISHED) - TCP_DEC_STATS(TCP_MIB_CURRESTAB); + TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } /* Change state AFTER socket is unhashed to avoid closed @@ -1791,13 +1793,13 @@ void tcp_close(struct sock *sk, long timeout) */ if (data_was_unread) { /* Unread data was tossed, zap the connection. */ - NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_KERNEL); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); - NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. @@ -1844,7 +1846,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -1869,7 +1871,8 @@ adjudge_to_death: if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); @@ -1891,7 +1894,8 @@ adjudge_to_death: "sockets\n"); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPABORTONMEMORY); } } @@ -1971,7 +1975,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -2586,12 +2590,69 @@ void __tcp_put_md5sig_pool(void) } EXPORT_SYMBOL(__tcp_put_md5sig_pool); + +int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, + struct tcphdr *th) +{ + struct scatterlist sg; + int err; + + __sum16 old_checksum = th->check; + th->check = 0; + /* options aren't included in the hash */ + sg_init_one(&sg, th, sizeof(struct tcphdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); + th->check = old_checksum; + return err; +} + +EXPORT_SYMBOL(tcp_md5_hash_header); + +int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, + struct sk_buff *skb, unsigned header_len) +{ + struct scatterlist sg; + const struct tcphdr *tp = tcp_hdr(skb); + struct hash_desc *desc = &hp->md5_desc; + unsigned i; + const unsigned head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + + sg_init_table(&sg, 1); + + sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); + if (crypto_hash_update(desc, &sg, head_data_len)) + return 1; + + for (i = 0; i < shi->nr_frags; ++i) { + const struct skb_frag_struct *f = &shi->frags[i]; + sg_set_page(&sg, f->page, f->size, f->page_offset); + if (crypto_hash_update(desc, &sg, f->size)) + return 1; + } + + return 0; +} + +EXPORT_SYMBOL(tcp_md5_hash_skb_data); + +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +{ + struct scatterlist sg; + + sg_init_one(&sg, key->key, key->keylen); + return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); +} + +EXPORT_SYMBOL(tcp_md5_hash_key); + #endif void tcp_done(struct sock *sk) { if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); @@ -2620,7 +2681,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long limit; + unsigned long nr_pages, limit; int order, i, max_share; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2689,8 +2750,9 @@ void __init tcp_init(void) * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; @@ -2727,4 +2789,3 @@ EXPORT_SYMBOL(tcp_splice_read); EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); -EXPORT_SYMBOL(tcp_statistics); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 2fbcc7d1b1a0..838d491dfda7 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,8 +1,6 @@ /* * tcp_diag.c Module for monitoring TCP transport protocols sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index bfcbd148a89d..c209e054a634 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } - + /* check when cwnd has not been incremented for a while */ + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cad73b7dfef0..d77c0d29e239 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -604,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst_metric(dst, RTAX_RTO_MIN); + rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); return rto_min; } @@ -731,6 +729,7 @@ void tcp_update_metrics(struct sock *sk) if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; + unsigned long rtt; if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? @@ -742,7 +741,8 @@ void tcp_update_metrics(struct sock *sk) return; } - m = dst_metric(dst, RTAX_RTT) - tp->srtt; + rtt = dst_metric_rtt(dst, RTAX_RTT); + m = rtt - tp->srtt; /* If newly calculated rtt larger than stored one, * store new one. Otherwise, use EWMA. Remember, @@ -750,12 +750,13 @@ void tcp_update_metrics(struct sock *sk) */ if (!(dst_metric_locked(dst, RTAX_RTT))) { if (m <= 0) - dst->metrics[RTAX_RTT - 1] = tp->srtt; + set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); else - dst->metrics[RTAX_RTT - 1] -= (m >> 3); + set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); } if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { + unsigned long var; if (m < 0) m = -m; @@ -764,11 +765,13 @@ void tcp_update_metrics(struct sock *sk) if (m < tp->mdev) m = tp->mdev; - if (m >= dst_metric(dst, RTAX_RTTVAR)) - dst->metrics[RTAX_RTTVAR - 1] = m; + var = dst_metric_rtt(dst, RTAX_RTTVAR); + if (m >= var) + var = m; else - dst->metrics[RTAX_RTTVAR-1] -= - (dst_metric(dst, RTAX_RTTVAR) - m)>>2; + var -= (var - m) >> 2; + + set_dst_metric_rtt(dst, RTAX_RTTVAR, var); } if (tp->snd_ssthresh >= 0xFFFF) { @@ -899,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk) if (dst_metric(dst, RTAX_RTT) == 0) goto reset; - if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) + if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) goto reset; /* Initial rtt is determined from SYN,SYN-ACK. @@ -916,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk) * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (dst_metric(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric(dst, RTAX_RTT); + if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { + tp->srtt = dst_metric_rtt(dst, RTAX_RTT); tp->rtt_seq = tp->snd_nxt; } - if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric(dst, RTAX_RTTVAR); + if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { + tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); @@ -949,17 +952,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric, { struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { + int mib_idx; + tp->reordering = min(TCP_MAX_REORDERING, metric); /* This exciting event is worth to be remembered. 8) */ if (ts) - NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); + mib_idx = LINUX_MIB_TCPTSREORDER; else if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); + mib_idx = LINUX_MIB_TCPRENOREORDER; else if (tcp_is_fack(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); + mib_idx = LINUX_MIB_TCPFACKREORDER; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -972,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } } +/* This must be called before lost_out is incremented */ +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) +{ + if ((tp->retransmit_skb_hint == NULL) || + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = skb; + + if (!tp->lost_out || + after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; +} + +static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tcp_verify_retransmit_hint(tp, skb); + + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +{ + tcp_verify_retransmit_hint(tp, skb); + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -1148,14 +1188,8 @@ static void tcp_mark_lost_retrans(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); + tcp_skb_mark_lost_uncond_verify(tp, skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) new_low_seq = ack_seq; @@ -1167,10 +1201,11 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_retrans_low = new_low_seq; } -static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { + struct tcp_sock *tp = tcp_sk(sk); u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); int dup_sack = 0; @@ -1178,7 +1213,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { dup_sack = 1; tcp_dsack_seen(tp); - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); @@ -1187,7 +1222,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, !before(start_seq_0, start_seq_1)) { dup_sack = 1; tcp_dsack_seen(tp); - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPDSACKOFORECV); } } @@ -1262,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= tcp_skb_pcount(skb); tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1283,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } @@ -1315,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - tp->retransmit_skb_hint = NULL; } return flag; @@ -1414,10 +1443,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); - struct tcp_sack_block sp[4]; + struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; struct sk_buff *skb; - int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3; + int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; int reord = tp->packets_out; int flag = 0; @@ -1432,7 +1461,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, tcp_highest_sack_reset(sk); } - found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, + found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) flag |= FLAG_DSACKING_ACK; @@ -1458,18 +1487,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, sp[used_sacks].end_seq)) { + int mib_idx; + if (dup_sack) { if (!tp->undo_marker) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD; } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(sp[used_sacks].end_seq, tp->snd_una)) continue; - NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + mib_idx = LINUX_MIB_TCPSACKDISCARD; } + + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -1616,10 +1649,10 @@ advance_sp: out: #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); + WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif return flag; } @@ -1713,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) return 0; skb = tcp_write_queue_head(sk); + if (tcp_skb_is_last(sk, skb)) + return 1; skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1854,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -1870,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); } static void tcp_clear_retrans_partial(struct tcp_sock *tp) @@ -1921,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how) /* Push undo marker, if it was plain RTO and nothing * was retransmitted. */ tp->undo_marker = tp->snd_una; - tcp_clear_retrans_hints_partial(tp); } else { tp->sacked_out = 0; tp->fackets_out = 0; - tcp_clear_all_retrans_hints(tp); } + tcp_clear_all_retrans_hints(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1939,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -1962,7 +1998,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) { if (flag & FLAG_SACK_RENEGING) { struct inet_connection_sock *icsk = inet_csk(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); icsk->icsk_retransmits++; @@ -2144,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } -/* RFC: This is from the original, I doubt that this is necessary at all: - * clear xmit_retrans hint if seq of this skb is beyond hint. How could we - * retransmitted past LOST markings in the first place? I'm not fully sure - * about undo and end of connection cases, which can cause R without L? - */ -static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) -{ - if ((tp->retransmit_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; -} - /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ @@ -2168,7 +2191,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; - BUG_TRAP(packets <= tp->packets_out); + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; cnt = tp->lost_cnt_hint; @@ -2204,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt = packets; } - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tcp_verify_left_out(tp); } @@ -2250,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (!tcp_skb_timedout(sk, skb)) break; - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tp->scoreboard_skb_hint = skb; @@ -2365,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) } tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; - - /* There is something screwy going on with the retrans hints after - an undo */ - tcp_clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -2382,15 +2393,19 @@ static int tcp_try_undo_recovery(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_may_undo(tp)) { + int mib_idx; + /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + mib_idx = LINUX_MIB_TCPLOSSUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); + mib_idx = LINUX_MIB_TCPFULLUNDO; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { @@ -2413,7 +2428,7 @@ static void tcp_try_undo_dsack(struct sock *sk) DBGUNDO(sk, "D-SACK"); tcp_undo_cwr(sk, 1); tp->undo_marker = 0; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2436,7 +2451,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) DBGUNDO(sk, "Hoe"); tcp_undo_cwr(sk, 0); - NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. * If the first packet was delayed, the rest @@ -2465,7 +2480,7 @@ static int tcp_try_undo_loss(struct sock *sk) DBGUNDO(sk, "partial loss"); tp->lost_out = 0; tcp_undo_cwr(sk, 1); - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (tcp_is_sack(tp)) @@ -2562,7 +2577,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; + int fast_rexmit = 0, mib_idx; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2584,7 +2599,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); - NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); } /* D. Check consistency of the current state. */ @@ -2593,7 +2608,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); + WARN_ON(tp->retrans_out != 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { @@ -2685,9 +2700,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* Otherwise enter Recovery state */ if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); + mib_idx = LINUX_MIB_TCPRENORECOVERY; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); + mib_idx = LINUX_MIB_TCPSACKRECOVERY; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; @@ -2819,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, + u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2829,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) int flag = 0; u32 pkts_acked = 0; u32 reord = tp->packets_out; + u32 prior_sacked = tp->sacked_out; s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); @@ -2885,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) - tp->urg_mode = 0; - tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; @@ -2910,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tcp_clear_all_retrans_hints(tp); + tp->scoreboard_skb_hint = NULL; + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = NULL; + if (skb == tp->lost_skb_hint) + tp->lost_skb_hint = NULL; } + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) + tp->snd_up = tp->snd_una; + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; @@ -2929,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) /* Non-retransmitted hole got filled? That's reordering */ if (reord < prior_fackets) tcp_update_reordering(sk, tp->fackets_out - reord, 0); + + /* No need to care for underflows here because + * the lost_skb_hint gets NULLed if we're past it + * (or something non-trivial happened) + */ + if (tcp_is_fack(tp)) + tp->lost_cnt_hint -= pkts_acked; + else + tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; } tp->fackets_out -= min(pkts_acked, tp->fackets_out); @@ -2953,9 +2985,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); if (!tp->packets_out && tcp_is_sack(tp)) { icsk = inet_csk(sk); if (tp->lost_out) { @@ -3198,7 +3230,7 @@ static int tcp_process_frto(struct sock *sk, int flag) } tp->frto_counter = 0; tp->undo_marker = 0; - NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); } return 0; } @@ -3251,12 +3283,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_ca_event(sk, CA_EVENT_FAST_ACK); - NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); } else { if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else - NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); @@ -3273,13 +3305,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * log. Something worked... */ sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; prior_packets = tp->packets_out; if (!prior_packets) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3305,8 +3338,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - icsk->icsk_probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3424,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } } +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +{ + __be32 *ptr = (__be32 *)(th + 1); + + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { + tp->rx_opt.saw_tstamp = 1; + ++ptr; + tp->rx_opt.rcv_tsval = ntohl(*ptr); + ++ptr; + tp->rx_opt.rcv_tsecr = ntohl(*ptr); + return 1; + } + return 0; +} + /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ @@ -3435,21 +3482,50 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 0; } else if (tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __be32 *ptr = (__be32 *)(th + 1); - if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + if (tcp_parse_aligned_timestamp(tp, th)) return 1; - } } tcp_parse_options(skb, &tp->rx_opt, 1); return 1; } +#ifdef CONFIG_TCP_MD5SIG +/* + * Parse MD5 Signature option + */ +u8 *tcp_parse_md5sig_option(struct tcphdr *th) +{ + int length = (th->doff << 2) - sizeof (*th); + u8 *ptr = (u8*)(th + 1); + + /* If the TCP option is too short, we can short cut */ + if (length < TCPOLEN_MD5SIG) + return NULL; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch(opcode) { + case TCPOPT_EOL: + return NULL; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + return NULL; + if (opcode == TCPOPT_MD5SIG) + return ptr; + } + ptr += opsize - 2; + length -= opsize; + } + return NULL; +} +#endif + static inline void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; @@ -3662,26 +3738,33 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, return 0; } -static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + int mib_idx; + if (before(seq, tp->rcv_nxt)) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); + mib_idx = LINUX_MIB_TCPDSACKOLDSENT; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); + mib_idx = LINUX_MIB_TCPDSACKOFOSENT; + + NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; } } -static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) { + struct tcp_sock *tp = tcp_sk(sk); + if (!tp->rx_opt.dsack) - tcp_dsack_set(tp, seq, end_seq); + tcp_dsack_set(sk, seq, end_seq); else tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } @@ -3692,7 +3775,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); if (tcp_is_sack(tp) && sysctl_tcp_dsack) { @@ -3700,7 +3783,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) end_seq = tp->rcv_nxt; - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); } } @@ -3727,9 +3810,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) * Decrease num_sacks. */ tp->rx_opt.num_sacks--; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + - tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + + tp->rx_opt.dsack; for (i = this_sack; i < tp->rx_opt.num_sacks; i++) sp[i] = sp[i + 1]; continue; @@ -3779,7 +3861,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) * * If the sack array is full, forget about the last one. */ - if (this_sack >= 4) { + if (this_sack >= TCP_NUM_SACKS) { this_sack--; tp->rx_opt.num_sacks--; sp--; @@ -3792,8 +3874,7 @@ new_sack: sp->start_seq = seq; sp->end_seq = end_seq; tp->rx_opt.num_sacks++; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; } /* RCV.NXT advances, some SACKs should be eaten. */ @@ -3817,7 +3898,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int i; /* RCV.NXT must cover all the block! */ - BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq)); + WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ for (i=this_sack+1; i < num_sacks; i++) @@ -3830,9 +3911,8 @@ static void tcp_sack_remove(struct tcp_sock *tp) } if (num_sacks != tp->rx_opt.num_sacks) { tp->rx_opt.num_sacks = num_sacks; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + - tp->rx_opt.dsack, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + + tp->rx_opt.dsack; } } @@ -3853,7 +3933,7 @@ static void tcp_ofo_queue(struct sock *sk) __u32 dsack = dsack_high; if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) dsack_high = TCP_SKB_CB(skb)->end_seq; - tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack); + tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { @@ -3911,8 +3991,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, - 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; } /* Queue data for delivery to the user. @@ -3981,8 +4060,8 @@ queue_and_out: if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an immediate ack. */ - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: tcp_enter_quickack_mode(sk); @@ -4004,7 +4083,7 @@ drop: tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); /* If window is closed, drop tail of packet. But after * remembering D-SACK for its head made in previous line. @@ -4069,30 +4148,30 @@ drop: if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ __kfree_skb(skb); - tcp_dsack_set(tp, seq, end_seq); + tcp_dsack_set(sk, seq, end_seq); goto add_sack; } if (after(seq, TCP_SKB_CB(skb1)->seq)) { /* Partial overlap. */ - tcp_dsack_set(tp, seq, + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); } else { skb1 = skb1->prev; } } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != (struct sk_buff *)&tp->out_of_order_queue && after(end_seq, TCP_SKB_CB(skb1)->seq)) { if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, end_seq); break; } __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); __kfree_skb(skb1); } @@ -4103,6 +4182,18 @@ add_sack: } } +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list) +{ + struct sk_buff *next = skb->next; + + __skb_unlink(skb, list); + __kfree_skb(skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + + return next; +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * Segments with FIN/SYN are not collapsed (only because this @@ -4120,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, for (skb = head; skb != tail;) { /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); continue; } @@ -4170,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, list); + __skb_queue_before(list, skb, nskb); skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4188,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) @@ -4254,7 +4337,7 @@ static int tcp_prune_ofo_queue(struct sock *sk) int res = 0; if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); /* Reset SACK state. A conforming SACK implementation will @@ -4283,7 +4366,7 @@ static int tcp_prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); @@ -4312,7 +4395,7 @@ static int tcp_prune_queue(struct sock *sk) * drop receive data on the floor. It will get retransmitted * and hopefully then we'll have sufficient space. */ - NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ tp->pred_flags = 0; @@ -4378,8 +4461,8 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), - demanded = max_t(unsigned int, tp->snd_cwnd, + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; if (sndmem > sk->sk_sndbuf) @@ -4633,6 +4716,67 @@ out: } #endif /* CONFIG_NET_DMA */ +/* Does PAWS and seqno based validation of an incoming segment, flags will + * play significant role here. + */ +static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, int syn_inerr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* RFC1323: H1. Apply PAWS check first. */ + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + tcp_paws_discard(sk, skb)) { + if (!th->rst) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + tcp_send_dupack(sk, skb); + goto discard; + } + /* Reset is accepted even if it did not pass PAWS. */ + } + + /* Step 1: check sequence number */ + if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + /* RFC793, page 37: "In all states except SYN-SENT, all reset + * (RST) segments are validated by checking their SEQ-fields." + * And page 69: "If an incoming segment is not acceptable, + * an acknowledgment should be sent in reply (unless the RST + * bit is set, if so drop the segment and return)". + */ + if (!th->rst) + tcp_send_dupack(sk, skb); + goto discard; + } + + /* Step 2: check RST bit */ + if (th->rst) { + tcp_reset(sk); + goto discard; + } + + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + + /* step 3: check security and precedence [ignored] */ + + /* step 4: Check for a SYN in window. */ + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + if (syn_inerr) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); + tcp_reset(sk); + return -1; + } + + return 1; + +discard: + __kfree_skb(skb); + return 0; +} + /* * TCP receive function for the ESTABLISHED state. * @@ -4660,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + int res; /* * Header prediction. @@ -4698,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Check timestamp */ if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __be32 *ptr = (__be32 *)(th + 1); - /* No? Slow path! */ - if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) + if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); - /* If PAWS failed, check it more carefully in slow path */ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) goto slow_path; @@ -4742,7 +4878,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_data_snd_check(sk); return 0; } else { /* Header too small */ - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } } else { @@ -4779,7 +4915,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); } if (copied_early) tcp_cleanup_rbuf(sk, skb->len); @@ -4802,7 +4938,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; - NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ __skb_pull(skb, tcp_header_len); @@ -4821,7 +4957,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - __tcp_ack_snd_check(sk, 0); + if (!copied_early || tp->rcv_nxt != tp->rcv_wup) + __tcp_ack_snd_check(sk, 0); no_ack: #ifdef CONFIG_NET_DMA if (copied_early) @@ -4841,51 +4978,12 @@ slow_path: goto csum_error; /* - * RFC1323: H1. Apply PAWS check first. - */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Resets are accepted even if PAWS failed. - - ts_recent update must be made after we are sure - that the packet is in window. - */ - } - - /* * Standard slow path. */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - /* RFC793, page 37: "In all states except SYN-SENT, all reset - * (RST) segments are validated by checking their SEQ-fields." - * And page 69: "If an incoming segment is not acceptable, - * an acknowledgment should be sent in reply (unless the RST bit - * is set, if so drop the segment and return)". - */ - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 1); + if (res <= 0) + return -res; step5: if (th->ack) @@ -4904,7 +5002,7 @@ step5: return 0; csum_error: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); discard: __kfree_skb(skb); @@ -4938,7 +5036,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } @@ -5167,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int queued = 0; + int res; tp->rx_opt.saw_tstamp = 0; @@ -5219,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Reset is accepted even if it did not pass PAWS. */ - } - - /* step 1: check sequence number */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - /* step 2: check RST bit */ - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - /* step 3: check security and precedence [ignored] */ - - /* step 4: - * - * Check for a SYN in window. - */ - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 0); + if (res <= 0) + return -res; /* step 5: check the ACK field */ if (th->ack) { @@ -5333,7 +5399,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } @@ -5393,7 +5459,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } @@ -5422,6 +5488,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_reordering); EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); EXPORT_SYMBOL(tcp_parse_options); +#ifdef CONFIG_TCP_MD5SIG +EXPORT_SYMBOL(tcp_parse_md5sig_option); +#endif EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12695be2c255..5c8fa7f1e327 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ - * * IPv4 specific functions * * @@ -89,10 +87,14 @@ int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr); -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen); +static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, struct tcphdr *th); +#else +static inline +struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +{ + return NULL; +} #endif struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { @@ -172,7 +174,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->sport, usin->sin_port, sk, 1); if (tmp < 0) { if (tmp == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return tmp; } @@ -340,16 +342,17 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) struct sock *sk; __u32 seq; int err; + struct net *net = dev_net(skb->dev); if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, + sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, th->source, inet_iif(skb)); if (!sk) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -362,7 +365,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; @@ -371,7 +374,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -415,10 +418,10 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* ICMPs are not backlogged, hence we cannot get an established socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -540,6 +543,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct net *net; /* Never send a reset in response to a reset. */ if (th->rst) @@ -578,33 +582,33 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; rep.th.doff = arg.iov[0].iov_len / 4; - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ - sizeof(struct tcphdr), IPPROTO_TCP, 0); + arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; - ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb, + net = dev_net(skb->dst->dev); + ip_send_reply(net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); } /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, - struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts) +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 ts, int oif, + struct tcp_md5sig_key *key, + int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -616,10 +620,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, ]; } rep; struct ip_reply_arg arg; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif + struct net *net = dev_net(skb->dst->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -645,23 +646,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - /* - * The SKB holds an imcoming packet, but may not have a valid ->sk - * pointer. This is especially the case when we're dealing with a - * TIME_WAIT ack, because the sk structure is long gone, and only - * the tcp_timewait_sock remains. So the md5 key is stashed in that - * structure, and we use it in preference. I believe that (twsk || - * skb->sk) holds true, but we program defensively. - */ - if (!twsk && skb->sk) { - key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); - } else if (twsk && twsk->tw_md5_keylen) { - tw_key.key = twsk->tw_md5_key; - tw_key.keylen = twsk->tw_md5_keylen; - key = &tw_key; - } else - key = NULL; - if (key) { int offset = (ts) ? 3 : 0; @@ -672,25 +656,23 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; rep.th.doff = arg.iov[0].iov_len/4; - tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], - key, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - if (twsk) - arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; + if (oif) + arg.bound_dev_if = oif; - ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb, + ip_send_reply(net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); } static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -698,19 +680,26 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, + tw->tw_bound_dev_if, + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 + ); inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, +static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, + tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, - req->ts_recent); + req->ts_recent, + 0, + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1000,32 +989,13 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, newkey, cmd.tcpm_keylen); } -static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, + __be32 daddr, __be32 saddr, int nbytes) { - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 old_checksum; - struct tcp_md5sig_pool *hp; struct tcp4_pseudohdr *bp; - struct hash_desc *desc; - int err; - unsigned int nbytes = 0; - - /* - * Okay, so RFC2385 is turned on for this connection, - * so we need to generate the MD5 hash for the packet now. - */ - - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; + struct scatterlist sg; bp = &hp->md5_blk.ip4; - desc = &hp->md5_desc; /* * 1. the TCP pseudo-header (in the order: source IP address, @@ -1035,86 +1005,96 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; - bp->protocol = protocol; - bp->len = htons(tcplen); - - sg_init_table(sg, 4); + bp->protocol = IPPROTO_TCP; + bp->len = cpu_to_be16(nbytes); - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); - - /* 2. the TCP header, excluding options, and assuming a - * checksum of zero/ - */ - old_checksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); - nbytes += sizeof(struct tcphdr); - - /* 3. the TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - unsigned char *data = (unsigned char *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } + sg_init_one(&sg, bp, sizeof(*bp)); + return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); +} - /* 4. an independently-specified key or password, known to both - * TCPs and presumably connection-specific - */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; +static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + __be32 daddr, __be32 saddr, struct tcphdr *th) +{ + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; - sg_mark_end(&sg[block - 1]); + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; - /* Now store the Hash into the packet */ - err = crypto_hash_init(desc); - if (err) + if (crypto_hash_init(desc)) goto clear_hash; - err = crypto_hash_update(desc, sg, nbytes); - if (err) + if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; - err = crypto_hash_final(desc, md5_hash); - if (err) + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) goto clear_hash; - /* Reset header, and free up the crypto */ tcp_put_md5sig_pool(); - th->check = old_checksum; - -out: return 0; + clear_hash: tcp_put_md5sig_pool(); clear_hash_noput: memset(md5_hash, 0, 16); - goto out; + return 1; } -int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, int protocol, - unsigned int tcplen) +int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, struct request_sock *req, + struct sk_buff *skb) { + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; + struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; if (sk) { saddr = inet_sk(sk)->saddr; daddr = inet_sk(sk)->daddr; + } else if (req) { + saddr = inet_rsk(req)->loc_addr; + daddr = inet_rsk(req)->rmt_addr; } else { - struct rtable *rt = (struct rtable *)dst; - BUG_ON(!rt); - saddr = rt->rt_src; - daddr = rt->rt_dst; + const struct iphdr *iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; } - return tcp_v4_do_calc_md5_hash(md5_hash, key, - saddr, daddr, - th, protocol, tcplen); + + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; + + if (crypto_hash_init(desc)) + goto clear_hash; + + if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) + goto clear_hash; + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) + goto clear_hash; + + tcp_put_md5sig_pool(); + return 0; + +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + return 1; } -EXPORT_SYMBOL(tcp_v4_calc_md5_hash); +EXPORT_SYMBOL(tcp_v4_md5_hash_skb); static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) { @@ -1130,80 +1110,32 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof(struct tcphdr); int genhash; - unsigned char *ptr; unsigned char newhash[16]; hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_location = tcp_parse_md5sig_option(th); - /* - * If the TCP option length is less than the TCP_MD5SIG - * option length, then we can shortcut - */ - if (length < TCPOLEN_MD5SIG) { - if (hash_expected) - return 1; - else - return 0; - } - - /* Okay, we can't shortcut - we have to grub through the options */ - ptr = (unsigned char *)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2) - goto done_opts; - if (opsize > length) - goto done_opts; - - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize-2; - length -= opsize; - } -done_opts: /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } /* Okay, so this is hash_expected and hash_location - * so we need to calculate the checksum. */ - genhash = tcp_v4_do_calc_md5_hash(newhash, - hash_expected, - iph->saddr, iph->daddr, - th, sk->sk_protocol, - skb->len); + genhash = tcp_v4_md5_hash_skb(newhash, + hash_expected, + NULL, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { @@ -1317,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1347,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { - NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } } @@ -1437,6 +1370,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); #ifdef CONFIG_TCP_MD5SIG @@ -1452,6 +1389,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newkey != NULL) tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, newkey, key->keylen); + newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } #endif @@ -1461,9 +1399,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; } @@ -1590,7 +1528,7 @@ discard: return 0; csum_err: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } @@ -1604,12 +1542,13 @@ int tcp_v4_rcv(struct sk_buff *skb) struct tcphdr *th; struct sock *sk; int ret; + struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; /* Count it even if it's bad */ - TCP_INC_STATS_BH(TCP_MIB_INSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1638,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, - th->source, iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; @@ -1685,7 +1623,7 @@ no_tcp_socket: if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { bad_packet: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { tcp_v4_send_reset(NULL, skb); } @@ -1706,7 +1644,7 @@ do_time_wait: } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); inet_twsk_put(inet_twsk(sk)); goto discard_it; } @@ -1814,7 +1752,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, - .calc_md5_hash = tcp_v4_calc_md5_hash, + .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v4_md5_add_func, .md5_parse = tcp_v4_parse_md5_keys, }; @@ -1871,7 +1809,7 @@ static int tcp_v4_init_sock(struct sock *sk) return 0; } -int tcp_v4_destroy_sock(struct sock *sk) +void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1915,8 +1853,6 @@ int tcp_v4_destroy_sock(struct sock *sk) } atomic_dec(&tcp_sockets_allocated); - - return 0; } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1959,8 +1895,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) req = req->dl_next; while (1) { while (req) { - if (req->rsk_ops->family == st->family && - net_eq(sock_net(req->sk), net)) { + if (req->rsk_ops->family == st->family) { cur = req; goto out; } @@ -2020,6 +1955,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) return rc; } +static inline int empty_bucket(struct tcp_iter_state *st) +{ + return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); +} + static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state* st = seq->private; @@ -2032,6 +1973,10 @@ static void *established_get_first(struct seq_file *seq) struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + /* Lockless fast path for the common case of empty buckets */ + if (empty_bucket(st)) + continue; + read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || @@ -2082,13 +2027,15 @@ get_tw: read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); - } else { - cur = NULL; - goto out; - } + /* Look for next non empty bucket */ + while (++st->bucket < tcp_hashinfo.ehash_size && + empty_bucket(st)) + ; + if (st->bucket >= tcp_hashinfo.ehash_size) + return NULL; + + read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else sk = sk_next(sk); @@ -2291,7 +2238,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2303,8 +2250,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, @@ -2450,6 +2397,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8245247a6ceb..779f2e9d0689 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -246,7 +244,7 @@ kill: } if (paws_reject) - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -397,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->pred_flags = 0; newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); @@ -482,7 +481,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); - TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; } @@ -611,98 +610,96 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_ack(skb, req); + req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) - NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } /* In sequence, PAWS is OK. */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) - req->ts_recent = tmp_opt.rcv_tsval; + req->ts_recent = tmp_opt.rcv_tsval; - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { - /* Truncate SYN, it is out of window starting - at tcp_rsk(req)->rcv_isn + 1. */ - flg &= ~TCP_FLAG_SYN; - } + if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { + /* Truncate SYN, it is out of window starting + at tcp_rsk(req)->rcv_isn + 1. */ + flg &= ~TCP_FLAG_SYN; + } - /* RFC793: "second check the RST bit" and - * "fourth, check the SYN bit" - */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); - goto embryonic_reset; - } + /* RFC793: "second check the RST bit" and + * "fourth, check the SYN bit" + */ + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + goto embryonic_reset; + } - /* ACK sequence verified above, just make sure ACK is - * set. If ACK not set, just silently drop the packet. - */ - if (!(flg & TCP_FLAG_ACK)) - return NULL; - - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_rsk(req)->acked = 1; - return NULL; - } + /* ACK sequence verified above, just make sure ACK is + * set. If ACK not set, just silently drop the packet. + */ + if (!(flg & TCP_FLAG_ACK)) + return NULL; - /* OK, ACK is valid, create big socket and - * feed this segment to it. It will repeat all - * the tests. THIS SEGMENT MUST MOVE SOCKET TO - * ESTABLISHED STATE. If it will be dropped after - * socket is created, wait for troubles. - */ - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, - req, NULL); - if (child == NULL) - goto listen_overflow; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + + /* OK, ACK is valid, create big socket and + * feed this segment to it. It will repeat all + * the tests. THIS SEGMENT MUST MOVE SOCKET TO + * ESTABLISHED STATE. If it will be dropped after + * socket is created, wait for troubles. + */ + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; #ifdef CONFIG_TCP_MD5SIG - else { - /* Copy over the MD5 key from the original socket */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - key = tp->af_specific->md5_lookup(sk, child); - if (key != NULL) { - /* - * We're using one, so create a matching key on the - * newsk structure. If we fail to get memory then we - * end up not copying the key across. Shucks. - */ - char *newkey = kmemdup(key->key, key->keylen, - GFP_ATOMIC); - if (newkey) { - if (!tcp_alloc_md5sig_pool()) - BUG(); - tp->af_specific->md5_add(child, child, - newkey, - key->keylen); - } + else { + /* Copy over the MD5 key from the original socket */ + struct tcp_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + key = tp->af_specific->md5_lookup(sk, child); + if (key != NULL) { + /* + * We're using one, so create a matching key on the + * newsk structure. If we fail to get memory then we + * end up not copying the key across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, + GFP_ATOMIC); + if (newkey) { + if (!tcp_alloc_md5sig_pool()) + BUG(); + tp->af_specific->md5_add(child, child, newkey, + key->keylen); } } + } #endif - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); - return child; + inet_csk_reqsk_queue_add(sk, req, child); + return child; - listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { - inet_rsk(req)->acked = 1; - return NULL; - } +listen_overflow: + if (!sysctl_tcp_abort_on_overflow) { + inet_rsk(req)->acked = 1; + return NULL; + } - embryonic_reset: - NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_reset(sk, skb); +embryonic_reset: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + if (!(flg & TCP_FLAG_RST)) + req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); - return NULL; + inet_csk_reqsk_queue_drop(sk, req, prev); + return NULL; } /* diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ad993ecb4810..990a58493235 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -347,28 +345,87 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } -static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, - __u32 tstamp, __u8 **md5_hash) +static inline int tcp_urg_mode(const struct tcp_sock *tp) { - if (tp->rx_opt.tstamp_ok) { + return tp->snd_una != tp->snd_up; +} + +#define OPTION_SACK_ADVERTISE (1 << 0) +#define OPTION_TS (1 << 1) +#define OPTION_MD5 (1 << 2) + +struct tcp_out_options { + u8 options; /* bit field of OPTION_* */ + u8 ws; /* window scale, 0 to disable */ + u8 num_sack_blocks; /* number of SACK blocks to include */ + u16 mss; /* 0 to disable */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ +}; + +static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, + const struct tcp_out_options *opts, + __u8 **md5_hash) { + if (unlikely(OPTION_MD5 & opts->options)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + *md5_hash = (__u8 *)ptr; + ptr += 4; + } else { + *md5_hash = NULL; + } + + if (likely(OPTION_TS & opts->options)) { + if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { + *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + } else { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + } + *ptr++ = htonl(opts->tsval); + *ptr++ = htonl(opts->tsecr); + } + + if (unlikely(opts->mss)) { + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + } + + if (unlikely(OPTION_SACK_ADVERTISE & opts->options && + !(OPTION_TS & opts->options))) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); - *ptr++ = htonl(tp->rx_opt.ts_recent); + (TCPOPT_SACK_PERM << 8) | + TCPOLEN_SACK_PERM); + } + + if (unlikely(opts->ws)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + opts->ws); } - if (tp->rx_opt.eff_sacks) { - struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; + + if (unlikely(opts->num_sack_blocks)) { + struct tcp_sack_block *sp = tp->rx_opt.dsack ? + tp->duplicate_sack : tp->selective_acks; int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * + (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK))); - for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { + for (this_sack = 0; this_sack < opts->num_sack_blocks; + ++this_sack) { *ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } @@ -378,81 +435,139 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, tp->rx_opt.eff_sacks--; } } +} + +static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + struct tcp_sock *tp = tcp_sk(sk); + unsigned size = 0; + #ifdef CONFIG_TCP_MD5SIG - if (md5_hash) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; } +#else + *md5 = NULL; #endif + + /* We always get an MSS option. The option bytes which will be seen in + * normal data packets should timestamps be used, must be in the MSS + * advertised. But we subtract them from tp->mss_cache so that + * calculations in tcp_sendmsg are simpler etc. So account for this + * fact here if necessary. If we don't do this correctly, as a + * receiver we won't recognize data packets as being full sized when we + * should, and thus we won't abide by the delayed ACK rules correctly. + * SACKs don't matter, we never delay an ACK when we have any of those + * going out. */ + opts->mss = tcp_advertise_mss(sk); + size += TCPOLEN_MSS_ALIGNED; + + if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + opts->options |= OPTION_TS; + opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsecr = tp->rx_opt.ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + if (likely(sysctl_tcp_window_scaling)) { + opts->ws = tp->rx_opt.rcv_wscale; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; + } + if (likely(sysctl_tcp_sack)) { + opts->options |= OPTION_SACK_ADVERTISE; + if (unlikely(!(OPTION_TS & opts->options))) + size += TCPOLEN_SACKPERM_ALIGNED; + } + + return size; } -/* Construct a tcp options header for a SYN or SYN_ACK packet. - * If this is every changed make sure to change the definition of - * MAX_SYN_SIZE to match the new maximum number of options that you - * can generate. - * - * Note - that with the RFC2385 TCP option, we make room for the - * 16 byte MD5 hash. This will be filled in later, so the pointer for the - * location to be filled is passed back up. - */ -static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, - __u32 ts_recent, __u8 **md5_hash) -{ - /* We always get an MSS option. - * The option bytes which will be seen in normal data - * packets should timestamps be used, must be in the MSS - * advertised. But we subtract them from tp->mss_cache so - * that calculations in tcp_sendmsg are simpler etc. - * So account for this fact here if necessary. If we - * don't do this correctly, as a receiver we won't - * recognize data packets as being full sized when we - * should, and thus we won't abide by the delayed ACK - * rules correctly. - * SACKs don't matter, we never delay an ACK when we - * have any of those going out. - */ - *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - if (ts) { - if (sack) - *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | - (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - else - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); /* TSVAL */ - *ptr++ = htonl(ts_recent); /* TSECR */ - } else if (sack) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | - TCPOLEN_SACK_PERM); - if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_WINDOW << 16) | - (TCPOLEN_WINDOW << 8) | - (wscale)); +static unsigned tcp_synack_options(struct sock *sk, + struct request_sock *req, + unsigned mss, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + unsigned size = 0; + struct inet_request_sock *ireq = inet_rsk(req); + char doing_ts; + #ifdef CONFIG_TCP_MD5SIG - /* - * If MD5 is enabled, then we set the option, and include the size - * (always 18). The actual MD5 hash is added just before the - * packet is sent. - */ - if (md5_hash) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; } +#else + *md5 = NULL; #endif + + /* we can't fit any SACK blocks in a packet with MD5 + TS + options. There was discussion about disabling SACK rather than TS in + order to fit in better with old, buggy kernels, but that was deemed + to be unnecessary. */ + doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); + + opts->mss = mss; + size += TCPOLEN_MSS_ALIGNED; + + if (likely(ireq->wscale_ok)) { + opts->ws = ireq->rcv_wscale; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; + } + if (likely(doing_ts)) { + opts->options |= OPTION_TS; + opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsecr = req->ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + if (likely(ireq->sack_ok)) { + opts->options |= OPTION_SACK_ADVERTISE; + if (unlikely(!doing_ts)) + size += TCPOLEN_SACKPERM_ALIGNED; + } + + return size; +} + +static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, + struct tcp_out_options *opts, + struct tcp_md5sig_key **md5) { + struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; + struct tcp_sock *tp = tcp_sk(sk); + unsigned size = 0; + +#ifdef CONFIG_TCP_MD5SIG + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (unlikely(*md5)) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } +#else + *md5 = NULL; +#endif + + if (likely(tp->rx_opt.tstamp_ok)) { + opts->options |= OPTION_TS; + opts->tsval = tcb ? tcb->when : 0; + opts->tsecr = tp->rx_opt.ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + + if (unlikely(tp->rx_opt.eff_sacks)) { + const unsigned remaining = MAX_TCP_OPTION_SPACE - size; + opts->num_sack_blocks = + min_t(unsigned, tp->rx_opt.eff_sacks, + (remaining - TCPOLEN_SACK_BASE_ALIGNED) / + TCPOLEN_SACK_PERBLOCK); + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + } + + return size; } /* This routine actually transmits TCP packets queued in by @@ -473,13 +588,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; - int tcp_header_size; -#ifdef CONFIG_TCP_MD5SIG + struct tcp_out_options opts; + unsigned tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; __u8 *md5_hash_location; -#endif struct tcphdr *th; - int sysctl_flags; int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); @@ -502,50 +615,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, inet = inet_sk(sk); tp = tcp_sk(sk); tcb = TCP_SKB_CB(skb); - tcp_header_size = tp->tcp_header_len; - -#define SYSCTL_FLAG_TSTAMPS 0x1 -#define SYSCTL_FLAG_WSCALE 0x2 -#define SYSCTL_FLAG_SACK 0x4 + memset(&opts, 0, sizeof(opts)); - sysctl_flags = 0; - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; - if (sysctl_tcp_timestamps) { - tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_TSTAMPS; - } - if (sysctl_tcp_window_scaling) { - tcp_header_size += TCPOLEN_WSCALE_ALIGNED; - sysctl_flags |= SYSCTL_FLAG_WSCALE; - } - if (sysctl_tcp_sack) { - sysctl_flags |= SYSCTL_FLAG_SACK; - if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) - tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; - } - } else if (unlikely(tp->rx_opt.eff_sacks)) { - /* A SACK is 2 pad bytes, a 2 byte header, plus - * 2 32-bit sequence numbers for each SACK block. - */ - tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + - (tp->rx_opt.eff_sacks * - TCPOLEN_SACK_PERBLOCK)); - } + if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) + tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); + else + tcp_options_size = tcp_established_options(sk, skb, &opts, + &md5); + tcp_header_size = tcp_options_size + sizeof(struct tcphdr); if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); -#ifdef CONFIG_TCP_MD5SIG - /* - * Are we doing MD5 on this segment? If so - make - * room for it. - */ - md5 = tp->af_specific->md5_lookup(sk, sk); - if (md5) - tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; -#endif - skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); skb_set_owner_w(skb, sk); @@ -570,45 +651,23 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->check = 0; th->urg_ptr = 0; - if (unlikely(tp->urg_mode && + /* The urg_mode check is necessary during a below snd_una win probe */ + if (unlikely(tcp_urg_mode(tp) && between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_syn_build_options((__be32 *)(th + 1), - tcp_advertise_mss(sk), - (sysctl_flags & SYSCTL_FLAG_TSTAMPS), - (sysctl_flags & SYSCTL_FLAG_SACK), - (sysctl_flags & SYSCTL_FLAG_WSCALE), - tp->rx_opt.rcv_wscale, - tcb->when, - tp->rx_opt.ts_recent, - -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL); - } else { - tcp_build_and_update_options((__be32 *)(th + 1), - tp, tcb->when, -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL); + tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); - } #ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; tp->af_specific->calc_md5_hash(md5_hash_location, - md5, - sk, NULL, NULL, - tcp_hdr(skb), - sk->sk_protocol, - skb->len); + md5, sk, NULL, skb); } #endif @@ -621,7 +680,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_data_sent(tp, skb, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) - TCP_INC_STATS(TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -630,10 +689,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_enter_cwr(sk, 1); return net_xmit_eval(err); - -#undef SYSCTL_FLAG_TSTAMPS -#undef SYSCTL_FLAG_WSCALE -#undef SYSCTL_FLAG_SACK } /* This routine just queue's the buffer @@ -963,7 +1018,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * - * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ @@ -974,10 +1029,13 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) u32 mss_now; u16 xmit_size_goal; int doing_tso = 0; + unsigned header_len; + struct tcp_out_options opts; + struct tcp_md5sig_key *md5; mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) doing_tso = 1; if (dst) { @@ -986,14 +1044,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tcp_sync_mss(sk, mtu); } - if (tp->rx_opt.eff_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); - -#ifdef CONFIG_TCP_MD5SIG - if (tp->af_specific->md5_lookup(sk, sk)) - mss_now -= TCPOLEN_MD5SIG_ALIGNED; -#endif + header_len = tcp_established_options(sk, NULL, &opts, &md5) + + sizeof(struct tcphdr); + /* The mss_cache is sized based on tp->tcp_header_len, which assumes + * some common options. If this is an odd packet (because we have SACK + * blocks etc) then our calculated header_len will be different, and + * we have to adjust mss_now correspondingly */ + if (header_len != tp->tcp_header_len) { + int delta = (int) header_len - tp->tcp_header_len; + mss_now -= delta; + } xmit_size_goal = mss_now; @@ -1139,7 +1199,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, /* Don't use the nagle rule for urgent data (or for the final FIN). * Nagle can be ignored during F-RTO too (see RFC4138). */ - if (tp->urg_mode || (tp->frto_counter == 2) || + if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -1770,6 +1830,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); + if (next_skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = skb; sk_wmem_free_skb(sk, next_skb); } @@ -1784,7 +1846,7 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); - int lost = 0; + u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1795,17 +1857,13 @@ void tcp_simple_retransmit(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - lost = 1; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); } } - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); - if (!lost) + if (prior_lost == tp->lost_out) return; if (tcp_is_reno(tp)) @@ -1880,8 +1938,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && + (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && (tcp_skb_pcount(skb) == 1 && @@ -1913,7 +1971,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (err == 0) { /* Update global TCP statistics. */ - TCP_INC_STATS(TCP_MIB_RETRANSSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); tp->total_retrans++; @@ -1942,83 +2000,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return err; } -/* This gets called after a retransmit timeout, and the initially - * retransmitted data is acknowledged. It tries to continue - * resending the rest of the retransmit queue, until either - * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. - */ -void tcp_xmit_retransmit_queue(struct sock *sk) +static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int packet_cnt; - - if (tp->retransmit_skb_hint) { - skb = tp->retransmit_skb_hint; - packet_cnt = tp->retransmit_cnt_hint; - } else { - skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } - - /* First pass: retransmit lost packets. */ - if (tp->lost_out) { - tcp_for_write_queue_from(skb, sk) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; - - if (skb == tcp_send_head(sk)) - break; - /* we could do better than to assign each time */ - tp->retransmit_skb_hint = skb; - tp->retransmit_cnt_hint = packet_cnt; - - /* Assume this retransmit will generate - * only one packet for congestion window - * calculation purposes. This works because - * tcp_retransmit_skb() will chop up the - * packet to be MSS sized and all the - * packet counting works out. - */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - return; - - if (sacked & TCPCB_LOST) { - if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { - if (tcp_retransmit_skb(sk, skb)) { - tp->retransmit_skb_hint = NULL; - return; - } - if (icsk->icsk_ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); - else - NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); - - if (skb == tcp_write_queue_head(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); - } - - packet_cnt += tcp_skb_pcount(skb); - if (packet_cnt >= tp->lost_out) - break; - } - } - } - - /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) - return; + return 0; /* No forward retransmissions in Reno are possible. */ if (tcp_is_reno(tp)) - return; + return 0; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... @@ -2029,43 +2022,104 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ if (tcp_may_send_now(sk)) - return; + return 0; - /* If nothing is SACKed, highest_sack in the loop won't be valid */ - if (!tp->sacked_out) - return; + return 1; +} - if (tp->forward_skb_hint) - skb = tp->forward_skb_hint; - else +/* This gets called after a retransmit timeout, and the initially + * retransmitted data is acknowledged. It tries to continue + * resending the rest of the retransmit queue, until either + * we've sent it all or the congestion window limit is reached. + * If doing SACK, the first ACK which comes back for a timeout + * based retransmit packet might feed us FACK information again. + * If so, we use it to avoid unnecessarily retransmissions. + */ +void tcp_xmit_retransmit_queue(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + struct sk_buff *hole = NULL; + u32 last_lost; + int mib_idx; + int fwd_rexmitting = 0; + + if (!tp->lost_out) + tp->retransmit_high = tp->snd_una; + + if (tp->retransmit_skb_hint) { + skb = tp->retransmit_skb_hint; + last_lost = TCP_SKB_CB(skb)->end_seq; + if (after(last_lost, tp->retransmit_high)) + last_lost = tp->retransmit_high; + } else { skb = tcp_write_queue_head(sk); + last_lost = tp->snd_una; + } + /* First pass: retransmit lost packets. */ tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - tp->forward_skb_hint = skb; + __u8 sacked = TCP_SKB_CB(skb)->sacked; - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + if (skb == tcp_send_head(sk)) break; + /* we could do better than to assign each time */ + if (hole == NULL) + tp->retransmit_skb_hint = skb; + /* Assume this retransmit will generate + * only one packet for congestion window + * calculation purposes. This works because + * tcp_retransmit_skb() will chop up the + * packet to be MSS sized and all the + * packet counting works out. + */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - break; + return; + + if (fwd_rexmitting) { +begin_fwd: + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + break; + mib_idx = LINUX_MIB_TCPFORWARDRETRANS; + + } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) { + tp->retransmit_high = last_lost; + if (!tcp_can_forward_retransmit(sk)) + break; + /* Backtrack if necessary to non-L'ed skb */ + if (hole != NULL) { + skb = hole; + hole = NULL; + } + fwd_rexmitting = 1; + goto begin_fwd; - if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + } else if (!(sacked & TCPCB_LOST)) { + if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) + hole = skb; continue; - /* Ok, retransmit it. */ - if (tcp_retransmit_skb(sk, skb)) { - tp->forward_skb_hint = NULL; - break; + } else { + last_lost = TCP_SKB_CB(skb)->end_seq; + if (icsk->icsk_ca_state != TCP_CA_Loss) + mib_idx = LINUX_MIB_TCPFASTRETRANS; + else + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; } + if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) + continue; + + if (tcp_retransmit_skb(sk, skb)) + return; + NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - - NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } } @@ -2119,7 +2173,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { - NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); return; } @@ -2130,9 +2184,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) - NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); - TCP_INC_STATS(TCP_MIB_OUTRSTS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); } /* WARNING: This routine must only be called when we have already sent @@ -2180,11 +2234,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; int tcp_header_size; + struct tcp_out_options opts; struct sk_buff *skb; -#ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *md5; __u8 *md5_hash_location; -#endif + int mss; skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) @@ -2195,18 +2249,30 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); - tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + - (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + - /* SACK_PERM is in the place of NOP NOP of TS */ - ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); + mss = dst_metric(dst, RTAX_ADVMSS); + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ + __u8 rcv_wscale; + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale); + ireq->rcv_wscale = rcv_wscale; + } + + memset(&opts, 0, sizeof(opts)); + TCP_SKB_CB(skb)->when = tcp_time_stamp; + tcp_header_size = tcp_synack_options(sk, req, mss, + skb, &opts, &md5) + + sizeof(struct tcphdr); -#ifdef CONFIG_TCP_MD5SIG - /* Are we doing MD5 on this segment? If so - make room for it */ - md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); - if (md5) - tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; -#endif skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2215,7 +2281,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = inet_sk(sk)->sport; + th->source = ireq->loc_port; th->dest = ireq->rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) @@ -2224,19 +2290,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale); - ireq->rcv_wscale = rcv_wscale; - } /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); @@ -2245,29 +2298,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); else #endif - TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, - ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, - TCP_SKB_CB(skb)->when, - req->ts_recent, - ( -#ifdef CONFIG_TCP_MD5SIG - md5 ? &md5_hash_location : -#endif - NULL) - ); - + tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { tp->af_specific->calc_md5_hash(md5_hash_location, - md5, - NULL, dst, req, - tcp_hdr(skb), sk->sk_protocol, - skb->len); + md5, NULL, req, skb); } #endif @@ -2304,6 +2343,9 @@ static void tcp_connect_init(struct sock *sk) if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) + tp->advmss = tp->rx_opt.user_mss; + tcp_initialize_rcv_mss(sk); tcp_select_initial_window(tcp_full_space(sk), @@ -2322,6 +2364,7 @@ static void tcp_connect_init(struct sock *sk) tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; tp->copied_seq = 0; @@ -2367,7 +2410,7 @@ int tcp_connect(struct sock *sk) */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; - TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -2531,8 +2574,7 @@ int tcp_write_wakeup(struct sock *sk) tcp_event_new_data_sent(sk, skb); return err; } else { - if (tp->urg_mode && - between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) + if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) tcp_xmit_probe_skb(sk, 1); return tcp_xmit_probe_skb(sk, 0); } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 5ff0ce6e9d39..7ddc30f0744f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -224,7 +224,7 @@ static __init int tcpprobe_init(void) if (bufsize < 0) return -EINVAL; - tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 63ed9d6830e7..6b6dff1164b9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -50,7 +48,7 @@ static void tcp_write_err(struct sock *sk) sk->sk_error_report(sk); tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /* Do not allow orphaned sockets to eat all our resources. @@ -91,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); - NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } return 0; @@ -181,7 +179,7 @@ static void tcp_delack_timer(unsigned long data) if (sock_owned_by_user(sk)) { /* Try again later. */ icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out_unlock; } @@ -200,10 +198,10 @@ static void tcp_delack_timer(unsigned long data) if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; - NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); tp->ucopy.memory = 0; } @@ -220,7 +218,7 @@ static void tcp_delack_timer(unsigned long data) icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); - NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } TCP_CHECK_TIMER(sk); @@ -289,7 +287,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - BUG_TRAP(!tcp_write_queue_empty(sk)); + WARN_ON(tcp_write_queue_empty(sk)); if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { @@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { + int mib_idx; + if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); + mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); + mib_idx = LINUX_MIB_TCPSACKFAILURES; } else { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); + mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); + mib_idx = LINUX_MIB_TCPRENOFAILURES; } } else if (icsk->icsk_ca_state == TCP_CA_Loss) { - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); + mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else { - NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); + mib_idx = LINUX_MIB_TCPTIMEOUTS; } + NET_INC_STATS_BH(sock_net(sk), mib_idx); } if (tcp_use_frto(sk)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 56fcda3694ba..2095abc3caba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,12 +5,10 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Hirokazu Takahashi, <taka@valinux.co.jp> * * Fixes: @@ -110,12 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; -EXPORT_SYMBOL(udp_statistics); - -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); @@ -130,14 +122,23 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[]) +static int udp_lib_lport_inuse(struct net *net, __u16 num, + const struct hlist_head udptable[], + struct sock *sk, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2)) { - struct sock *sk; + struct sock *sk2; struct hlist_node *node; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (net_eq(sock_net(sk), net) && sk->sk_hash == num) + sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_hash == num && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2)) return 1; return 0; } @@ -154,83 +155,37 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, const struct sock *sk2 ) ) { struct hlist_head *udptable = sk->sk_prot->h.udp_hash; - struct hlist_node *node; - struct hlist_head *head; - struct sock *sk2; int error = 1; struct net *net = sock_net(sk); write_lock_bh(&udp_hash_lock); if (!snum) { - int i, low, high, remaining; - unsigned rover, best, best_size_so_far; + int low, high, remaining; + unsigned rand; + unsigned short first; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - best_size_so_far = UINT_MAX; - best = rover = net_random() % remaining + low; - - /* 1st pass: look for empty (or shortest) hash chain */ - for (i = 0; i < UDP_HTABLE_SIZE; i++) { - int size = 0; - - head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(head)) - goto gotit; - - sk_for_each(sk2, node, head) { - if (++size >= best_size_so_far) - goto next; - } - best_size_so_far = size; - best = rover; - next: - /* fold back if end of range */ - if (++rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); - - - } - - /* 2nd pass: find hole in shortest hash chain */ - rover = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(net, rover, udptable)) - goto gotit; - rover += UDP_HTABLE_SIZE; - if (rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); + rand = net_random(); + snum = first = rand % remaining + low; + rand |= 1; + while (udp_lib_lport_inuse(net, snum, udptable, sk, + saddr_comp)) { + do { + snum = snum + rand; + } while (snum < low || snum > high); + if (snum == first) + goto fail; } - - - /* All ports in use! */ + } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) goto fail; -gotit: - snum = rover; - } else { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_for_each(sk2, node, head) - if (sk2->sk_hash == snum && - sk2 != sk && - net_eq(sock_net(sk2), net) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2) ) - goto fail; - } - inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; - sk_add_node(sk, head); + sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -266,7 +221,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && @@ -307,6 +262,28 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + struct sock *sk; + const struct iphdr *iph = ip_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); +} + +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + static inline struct sock *udp_v4_mcast_next(struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, @@ -356,11 +333,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) struct sock *sk; int harderr; int err; + struct net *net = dev_net(skb->dev); - sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, + sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ } @@ -528,7 +506,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -656,11 +635,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct net *net = sock_net(sk); + security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); + err = ip_route_output_flow(net, &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -727,7 +708,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -890,7 +872,8 @@ try_again: goto out_free; if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -919,7 +902,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (noblock) @@ -950,6 +933,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -990,7 +994,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) ret = (*up->encap_rcv)(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } @@ -1040,17 +1045,19 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -1061,7 +1068,7 @@ drop: * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, +static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) @@ -1070,7 +1077,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1085,15 +1092,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1158,6 +1157,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; + struct net *net = dev_net(skb->dev); /* * Validate the packet. @@ -1180,19 +1180,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], goto csum_error; if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + return __udp4_lib_mcast_deliver(net, skb, uh, + saddr, daddr, udptable); - sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1211,7 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1245,7 +1239,7 @@ csum_error: ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1255,12 +1249,11 @@ int udp_rcv(struct sk_buff *skb) return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); } -int udp_destroy_sock(struct sock *sk) +void udp_destroy_sock(struct sock *sk) { lock_sock(sk); udp_flush_pending_frames(sk); release_sock(sk); - return 0; } /* @@ -1319,6 +1312,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1331,6 +1326,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; @@ -1453,7 +1450,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } @@ -1481,7 +1479,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, @@ -1629,12 +1627,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->sport); seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, len); + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops), len); } int udp4_seq_show(struct seq_file *seq, void *v) @@ -1643,7 +1642,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-127s\n", " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); + "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; int len; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7288bf7977fb..2e9bad2fa1bc 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern int udp_destroy_sock(struct sock *sk); +extern void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS extern int udp4_seq_show(struct seq_file *seq, void *v); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 72ce26b6c4d3..3c807964da96 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -1,8 +1,6 @@ /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * - * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ - * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: @@ -13,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 9c798abce736..63418185f524 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -47,8 +47,10 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len - - hdrlen); + skb_set_network_header(skb, -x->props.header_len - + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); skb->transport_header = skb->network_header + sizeof(*top_iph); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 42814a2ec9d7..ec992159b5f8 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -96,10 +96,8 @@ config INET6_ESP config INET6_IPCOMP tristate "IPv6: IPComp transformation" - select XFRM select INET6_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 147588f4c7c0..7b6a584b62dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -121,6 +119,7 @@ static void ipv6_regen_rndid(unsigned long data); static int desync_factor = MAX_DESYNC_FACTOR * HZ; #endif +static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); /* @@ -154,7 +153,7 @@ static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf __read_mostly = { +static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -185,6 +184,8 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -217,6 +218,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -226,9 +229,15 @@ const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_IN const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ -static inline int addrconf_qdisc_ok(struct net_device *dev) +static inline bool addrconf_qdisc_ok(const struct net_device *dev) { - return (dev->qdisc != &noop_qdisc); + return !qdisc_tx_is_noop(dev); +} + +/* Check if a route is valid prefix route */ +static inline int addrconf_is_prefix_route(const struct rt6_info *rt) +{ + return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -304,8 +313,10 @@ static void in6_dev_finish_destroy_rcu(struct rcu_head *head) void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(idev->addr_list==NULL); - BUG_TRAP(idev->mc_list==NULL); + + WARN_ON(idev->addr_list != NULL); + WARN_ON(idev->mc_list != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); #endif @@ -344,6 +355,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) kfree(ndev); return NULL; } + if (ndev->cnf.forwarding) + dev_disable_lro(dev); /* We refer to the device */ dev_hold(dev); @@ -372,6 +385,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) */ in6_dev_hold(ndev); + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + ndev->cnf.accept_dad = -1; + #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { printk(KERN_INFO @@ -438,6 +454,8 @@ static void dev_forward_change(struct inet6_dev *idev) if (!idev) return; dev = idev->dev; + if (idev->cnf.forwarding) + dev_disable_lro(dev); if (dev && (dev->flags & IFF_MULTICAST)) { if (idev->cnf.forwarding) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); @@ -483,12 +501,14 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->forwarding) return; + rtnl_lock(); if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); } else if ((!*p) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); + rtnl_unlock(); if (*p) rt6_purge_dflt_routers(net); @@ -499,8 +519,9 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - BUG_TRAP(ifp->if_next==NULL); - BUG_TRAP(ifp->lst_next==NULL); + WARN_ON(ifp->if_next != NULL); + WARN_ON(ifp->lst_next != NULL); + #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); #endif @@ -568,6 +589,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, struct rt6_info *rt; int hash; int err = 0; + int addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_ANY || + addr_type & IPV6_ADDR_MULTICAST || + (!(idev->dev->flags & IFF_LOOPBACK) && + addr_type & IPV6_ADDR_LOOPBACK)) + return ERR_PTR(-EADDRNOTAVAIL); rcu_read_lock_bh(); if (idev->dead) { @@ -749,12 +777,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); + addrconf_del_timer(ifp); + ipv6_ifa_notify(RTM_DELADDR, ifp); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - addrconf_del_timer(ifp); - /* * Purge or update corresponding prefix * @@ -777,7 +805,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (rt && addrconf_is_prefix_route(rt)) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; @@ -958,7 +986,8 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, +static int ipv6_get_saddr_eval(struct net *net, + struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, int i) { @@ -1037,7 +1066,8 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, break; case IPV6_SADDR_RULE_LABEL: /* Rule 6: Prefer matching label */ - ret = ipv6_addr_label(&score->ifa->addr, score->addr_type, + ret = ipv6_addr_label(net, + &score->ifa->addr, score->addr_type, score->ifa->idev->dev->ifindex) == dst->label; break; #ifdef CONFIG_IPV6_PRIVACY @@ -1076,13 +1106,12 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net_device *dst_dev, +int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { struct ipv6_saddr_score scores[2], *score = &scores[0], *hiscore = &scores[1]; - struct net *net = dev_net(dst_dev); struct ipv6_saddr_dst dst; struct net_device *dev; int dst_type; @@ -1091,7 +1120,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, dst.addr = daddr; dst.ifindex = dst_dev ? dst_dev->ifindex : 0; dst.scope = __ipv6_addr_src_scope(dst_type); - dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); + dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); dst.prefs = prefs; hiscore->rule = -1; @@ -1159,8 +1188,8 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { int minihiscore, miniscore; - minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i); - miniscore = ipv6_get_saddr_eval(score, &dst, i); + minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); + miniscore = ipv6_get_saddr_eval(net, score, &dst, i); if (minihiscore > miniscore) { if (i == IPV6_SADDR_RULE_SCOPE && @@ -1400,6 +1429,20 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct inet6_dev *idev = ifp->idev; + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC address */ + idev->cnf.disable_ipv6 = 1; + } + } + if (net_ratelimit()) printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); addrconf_dad_stop(ifp); @@ -1645,6 +1688,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_dst_len = plen, .fc_flags = RTF_UP | flags, .fc_nlinfo.nl_net = dev_net(dev), + .fc_protocol = RTPROT_KERNEL, }; ipv6_addr_copy(&cfg.fc_dst, pfx); @@ -1788,7 +1832,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); - if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { + if (rt && addrconf_is_prefix_route(rt)) { /* Autoconf prefix route */ if (valid_lft == 0) { ip6_del_rt(rt); @@ -1822,6 +1866,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) struct inet6_ifaddr * ifp; struct in6_addr addr; int create = 0, update_lft = 0; + struct net *net = dev_net(dev); if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -1840,7 +1885,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1); + ifp = ipv6_get_ifaddr(net, &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -1848,7 +1893,7 @@ ok: #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) + !net->ipv6.devconf_all->forwarding) addr_flags = IFA_F_OPTIMISTIC; #endif @@ -2277,7 +2322,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && - !ipv6_devconf.forwarding) + !dev_net(idev->dev)->ipv6.devconf_all->forwarding) addr_flags |= IFA_F_OPTIMISTIC; #endif @@ -2732,6 +2777,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); @@ -2779,6 +2825,11 @@ static void addrconf_dad_timer(unsigned long data) read_unlock_bh(&idev->lock); goto out; } + if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) { + read_unlock_bh(&idev->lock); + addrconf_dad_failure(ifp); + return; + } spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* @@ -3638,6 +3689,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; #endif + array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; + array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; } static inline size_t inet6_if_nlmsg_size(void) @@ -4197,6 +4250,22 @@ static struct addrconf_sysctl_table }, #endif { + .ctl_name = CTL_UNNUMBERED, + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, /* sentinel */ } }, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 9bfa8846f262..08909039d87b 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,6 +29,9 @@ */ struct ip6addrlbl_entry { +#ifdef CONFIG_NET_NS + struct net *lbl_net; +#endif struct in6_addr prefix; int prefixlen; int ifindex; @@ -46,6 +49,16 @@ static struct ip6addrlbl_table u32 seq; } ip6addrlbl_table; +static inline +struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) +{ +#ifdef CONFIG_NET_NS + return lbl->lbl_net; +#else + return &init_net; +#endif +} + /* * Default policy table (RFC3484 + extensions) * @@ -65,7 +78,7 @@ static struct ip6addrlbl_table #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL -static const __initdata struct ip6addrlbl_init_table +static const __net_initdata struct ip6addrlbl_init_table { const struct in6_addr *prefix; int prefixlen; @@ -108,6 +121,9 @@ static const __initdata struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { +#ifdef CONFIG_NET_NS + release_net(p->lbl_net); +#endif kfree(p); } @@ -128,10 +144,13 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) } /* Find label */ -static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, +static int __ip6addrlbl_match(struct net *net, + struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { + if (!net_eq(ip6addrlbl_net(p), net)) + return 0; if (p->ifindex && p->ifindex != ifindex) return 0; if (p->addrtype && p->addrtype != addrtype) @@ -141,19 +160,21 @@ static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, return 1; } -static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, +static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, + const struct in6_addr *addr, int type, int ifindex) { struct hlist_node *pos; struct ip6addrlbl_entry *p; hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { - if (__ip6addrlbl_match(p, addr, type, ifindex)) + if (__ip6addrlbl_match(net, p, addr, type, ifindex)) return p; } return NULL; } -u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) +u32 ipv6_addr_label(struct net *net, + const struct in6_addr *addr, int type, int ifindex) { u32 label; struct ip6addrlbl_entry *p; @@ -161,7 +182,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; rcu_read_lock(); - p = __ipv6_addr_label(addr, type, ifindex); + p = __ipv6_addr_label(net, addr, type, ifindex); label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); @@ -174,7 +195,8 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) } /* allocate one entry */ -static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, +static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { @@ -216,6 +238,9 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); +#ifdef CONFIG_NET_NS + newp->lbl_net = hold_net(net); +#endif atomic_set(&newp->refcnt, 1); return newp; } @@ -237,6 +262,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { @@ -261,7 +287,8 @@ out: } /* add a label */ -static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, +static int ip6addrlbl_add(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label, int replace) { struct ip6addrlbl_entry *newp; @@ -274,7 +301,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, (unsigned int)label, replace); - newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); + newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); spin_lock(&ip6addrlbl_table.lock); @@ -286,7 +313,8 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, } /* remove a label */ -static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, +static int __ip6addrlbl_del(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex) { struct ip6addrlbl_entry *p = NULL; @@ -300,6 +328,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && + net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); @@ -311,7 +340,8 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, return ret; } -static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, +static int ip6addrlbl_del(struct net *net, + const struct in6_addr *prefix, int prefixlen, int ifindex) { struct in6_addr prefix_buf; @@ -324,13 +354,13 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); - ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); + ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); spin_unlock(&ip6addrlbl_table.lock); return ret; } /* add default label */ -static __init int ip6addrlbl_init(void) +static int __net_init ip6addrlbl_net_init(struct net *net) { int err = 0; int i; @@ -338,7 +368,8 @@ static __init int ip6addrlbl_init(void) ADDRLABEL(KERN_DEBUG "%s()\n", __func__); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { - int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, + int ret = ip6addrlbl_add(net, + ip6addrlbl_init_table[i].prefix, ip6addrlbl_init_table[i].prefixlen, 0, ip6addrlbl_init_table[i].label, 0); @@ -349,11 +380,32 @@ static __init int ip6addrlbl_init(void) return err; } +static void __net_exit ip6addrlbl_net_exit(struct net *net) +{ + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *pos, *n; + + /* Remove all labels belonging to the exiting net */ + spin_lock(&ip6addrlbl_table.lock); + hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + if (net_eq(ip6addrlbl_net(p), net)) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); + } + } + spin_unlock(&ip6addrlbl_table.lock); +} + +static struct pernet_operations ipv6_addr_label_ops = { + .init = ip6addrlbl_net_init, + .exit = ip6addrlbl_net_exit, +}; + int __init ipv6_addr_label_init(void) { spin_lock_init(&ip6addrlbl_table.lock); - return ip6addrlbl_init(); + return register_pernet_subsys(&ipv6_addr_label_ops); } static const struct nla_policy ifal_policy[IFAL_MAX+1] = { @@ -371,9 +423,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; - if (net != &init_net) - return 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; @@ -385,7 +434,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; if (ifal->ifal_index && - !__dev_get_by_index(&init_net, ifal->ifal_index)) + !__dev_get_by_index(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) @@ -403,12 +452,12 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, switch(nlh->nlmsg_type) { case RTM_NEWADDRLABEL: - err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, + err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index, label, nlh->nlmsg_flags & NLM_F_REPLACE); break; case RTM_DELADDRLABEL: - err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, + err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index); break; default: @@ -458,12 +507,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, s_idx = cb->args[0]; int err; - if (net != &init_net) - return 0; - rcu_read_lock(); hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { - if (idx >= s_idx) { + if (idx >= s_idx && + net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, NETLINK_CB(cb->skb).pid, @@ -499,9 +546,6 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - if (net != &init_net) - return 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; @@ -513,7 +557,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, return -EINVAL; if (ifal->ifal_index && - !__dev_get_by_index(&init_net, ifal->ifal_index)) + !__dev_get_by_index(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) @@ -524,7 +568,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, return -EINVAL; rcu_read_lock(); - p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); if (p && ip6addrlbl_hold(p)) p = NULL; lseq = ip6addrlbl_table.seq; @@ -552,7 +596,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); out: return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e84b3fd17fb4..01edac888510 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,8 +7,6 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support @@ -52,6 +50,7 @@ #include <net/ipip.h> #include <net/protocol.h> #include <net/inet_common.h> +#include <net/route.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> @@ -61,9 +60,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_IPV6_MROUTE #include <linux/mroute6.h> -#endif MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -87,7 +84,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; @@ -101,13 +97,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) build_ehash_secret(); /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw6[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -122,10 +117,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (!answer) { + if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* @@ -157,7 +151,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); @@ -373,7 +367,7 @@ int inet6_release(struct socket *sock) EXPORT_SYMBOL(inet6_release); -int inet6_destroy_sock(struct sock *sk) +void inet6_destroy_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -391,8 +385,6 @@ int inet6_destroy_sock(struct sock *sk) if ((opt = xchg(&np->opt, NULL)) != NULL) sock_kfree_s(sk, opt, opt->tot_len); - - return 0; } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -803,61 +795,55 @@ static void ipv6_packet_cleanup(void) dev_remove_pack(&ipv6_packet_type); } -static int __init init_ipv6_mibs(void) +static int __net_init ipv6_init_mibs(struct net *net) { - if (snmp_mib_init((void **)ipv6_statistics, + if (snmp_mib_init((void **)net->mib.udp_stats_in6, + sizeof (struct udp_mib)) < 0) + return -ENOMEM; + if (snmp_mib_init((void **)net->mib.udplite_stats_in6, + sizeof (struct udp_mib)) < 0) + goto err_udplite_mib; + if (snmp_mib_init((void **)net->mib.ipv6_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, + if (snmp_mib_init((void **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)icmpv6msg_statistics, + if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) - goto err_udp_mib; - if (snmp_mib_init((void **)udplite_stats_in6, - sizeof (struct udp_mib)) < 0) - goto err_udplite_mib; return 0; -err_udplite_mib: - snmp_mib_free((void **)udp_stats_in6); -err_udp_mib: - snmp_mib_free((void **)icmpv6msg_statistics); err_icmpmsg_mib: - snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void **)ipv6_statistics); + snmp_mib_free((void **)net->mib.ipv6_statistics); err_ip_mib: + snmp_mib_free((void **)net->mib.udplite_stats_in6); +err_udplite_mib: + snmp_mib_free((void **)net->mib.udp_stats_in6); return -ENOMEM; - } -static void cleanup_ipv6_mibs(void) +static void __net_exit ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void **)ipv6_statistics); - snmp_mib_free((void **)icmpv6_statistics); - snmp_mib_free((void **)icmpv6msg_statistics); - snmp_mib_free((void **)udp_stats_in6); - snmp_mib_free((void **)udplite_stats_in6); + snmp_mib_free((void **)net->mib.udp_stats_in6); + snmp_mib_free((void **)net->mib.udplite_stats_in6); + snmp_mib_free((void **)net->mib.ipv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6msg_statistics); } -static int inet6_net_init(struct net *net) +static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; - net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; - net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; - net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; - net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; - net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; - net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.icmpv6_time = 1*HZ; + err = ipv6_init_mibs(net); + if (err) + return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) @@ -868,7 +854,6 @@ static int inet6_net_init(struct net *net) err = ac6_proc_init(net); if (err) goto proc_ac6_fail; -out: #endif return err; @@ -877,7 +862,9 @@ proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); - goto out; +out: + ipv6_cleanup_mibs(net); + return err; #endif } @@ -888,6 +875,7 @@ static void inet6_net_exit(struct net *net) tcp6_proc_exit(net); ac6_proc_exit(net); #endif + ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { @@ -938,11 +926,11 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; - /* Initialise ipv6 mibs */ - err = init_ipv6_mibs(); +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); if (err) - goto out_unregister_sock; - + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -956,9 +944,9 @@ static int __init inet6_init(void) err = icmpv6_init(); if (err) goto icmp_fail; -#ifdef CONFIG_IPV6_MROUTE - ip6_mr_init(); -#endif + err = ip6_mr_init(); + if (err) + goto ipmr_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1061,12 +1049,16 @@ netfilter_fail: igmp_fail: ndisc_cleanup(); ndisc_fail: + ip6_mr_cleanup(); +ipmr_fail: icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: - cleanup_ipv6_mibs(); -out_unregister_sock: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: @@ -1115,11 +1107,14 @@ static void __exit inet6_exit(void) ipv6_netfilter_fini(); igmp6_cleanup(); ndisc_cleanup(); + ip6_mr_cleanup(); icmpv6_cleanup(); rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); - cleanup_ipv6_mibs(); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e1b29fabdf0..8336cd81cb4f 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -60,7 +60,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct inet6_dev *idev; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); - int ishost = !ipv6_devconf.forwarding; + int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; if (!capable(CAP_NET_ADMIN)) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 0f0f94a40335..410046a8cc91 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -734,7 +732,7 @@ int datagram_send_ctl(struct net *net, LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); err = -EINVAL; - break; + goto exit_f; } } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c6bb4c6d24b3..b181b08fb761 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -521,6 +521,10 @@ static int esp6_init_state(struct xfrm_state *x) crypto_aead_ivsize(aead); switch (x->props.mode) { case XFRM_MODE_BEET: + if (x->sel.family != AF_INET6) + x->props.header_len += IPV4_BEET_PHMAXLEN + + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); + break; case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c993d52b..6bfffec2371c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -7,8 +7,6 @@ * Andi Kleen <ak@muc.de> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -279,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -303,7 +301,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); dst_release(dst); return -1; } @@ -321,7 +320,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; - int accept_source_route = ipv6_devconf.accept_source_route; + struct net *net = dev_net(skb->dev); + int accept_source_route = net->ipv6.devconf_all->accept_source_route; idev = in6_dev_get(skb->dev); if (idev) { @@ -333,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -343,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -358,7 +358,7 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -384,7 +384,7 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -403,7 +403,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - @@ -417,7 +417,7 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; @@ -440,13 +440,13 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(&init_net, addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -458,7 +458,7 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -478,7 +478,7 @@ looped_back: if (skb->dst->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); @@ -494,7 +494,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -581,29 +581,33 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); u32 pkt_len; + struct net *net = dev_net(skb->dst->dev); if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(ipv6_skb_idev(skb), + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8d05527524e3..f5de3f9dc692 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -93,7 +93,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (flags & RT6_LOOKUP_F_SRCPREF_COA) srcprefs |= IPV6_PREFER_SRC_COA; - if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + if (ipv6_dev_get_saddr(net, + ip6_dst_idev(&rt->u.dst)->dev, &flp->fl6_dst, srcprefs, &saddr)) goto again; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d42dd16d3487..9b7d19ae5ced 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $ - * * Based on net/ipv4/icmp.c * * RFC 1885 @@ -93,19 +91,22 @@ static struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static __inline__ int icmpv6_xmit_lock(struct sock *sk) +static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmpv6_sk(net); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static __inline__ void icmpv6_xmit_unlock(struct sock *sk) @@ -182,7 +183,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, */ dst = ip6_route_output(net, sk, fl); if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; @@ -394,11 +395,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); - sk = icmpv6_sk(net); - np = inet6_sk(sk); - - if (icmpv6_xmit_lock(sk)) + sk = icmpv6_xmit_lock(net); + if (sk == NULL) return; + np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -541,11 +541,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); - sk = icmpv6_sk(net); - np = inet6_sk(sk); - - if (icmpv6_xmit_lock(sk)) + sk = icmpv6_xmit_lock(net); + if (sk == NULL) return; + np = inet6_sk(sk); if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -665,7 +664,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -694,7 +693,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(idev, type); + ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -773,7 +772,7 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; discard_it: - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; @@ -956,7 +955,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 87801cc1b2f8..16d43f20b32f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -98,7 +98,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&treq->rmt_addr, raddr) && ipv6_addr_equal(&treq->loc_addr, laddr) && (!treq->iif || treq->iif == iif)) { - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); *prevp = prev; return req; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 580014aea4d6..1646a5658255 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -28,7 +28,7 @@ void __inet6_hash(struct sock *sk) struct hlist_head *list; rwlock_t *lock; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -68,7 +68,7 @@ struct sock *__inet6_lookup_established(struct net *net, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); @@ -104,7 +104,8 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0; read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + sk_for_each(sk, node, + &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -165,14 +166,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, + struct net *net = sock_net(sk); + const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -201,7 +202,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -209,11 +210,11 @@ unique: if (twp != NULL) { *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); } else if (tw != NULL) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1ee4fa17c129..29c7c99e69f7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -289,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) w->leaf = rt; return 1; } - BUG_TRAP(res!=0); + WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -380,6 +378,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.skb = skb; arg.cb = cb; + arg.net = net; w->args = &arg; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { @@ -663,17 +662,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (net->ipv6.ip6_fib_timer->expires == 0 && + if (!timer_pending(&net->ipv6.ip6_fib_timer) && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(&net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(struct net *net) { - if (net->ipv6.ip6_fib_timer->expires == 0) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); + if (!timer_pending(&net->ipv6.ip6_fib_timer)) + mod_timer(&net->ipv6.ip6_fib_timer, + jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -780,7 +779,7 @@ out: pn->leaf = fib6_find_prefix(info->nl_net, pn); #if RT6_DEBUG >= 2 if (!pn->leaf) { - BUG_TRAP(pn->leaf != NULL); + WARN_ON(pn->leaf == NULL); pn->leaf = info->nl_net->ipv6.ip6_null_entry; } #endif @@ -944,7 +943,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { - BUG_TRAP(saddr!=NULL); + WARN_ON(saddr == NULL); if (fn && fn->subtree) fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); @@ -998,9 +997,9 @@ static struct fib6_node *fib6_repair_tree(struct net *net, RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; - BUG_TRAP(!(fn->fn_flags&RTN_RTINFO)); - BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT)); - BUG_TRAP(fn->leaf==NULL); + WARN_ON(fn->fn_flags & RTN_RTINFO); + WARN_ON(fn->fn_flags & RTN_TL_ROOT); + WARN_ON(fn->leaf != NULL); children = 0; child = NULL; @@ -1016,7 +1015,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 if (fn->leaf==NULL) { - BUG_TRAP(fn->leaf); + WARN_ON(!fn->leaf); fn->leaf = net->ipv6.ip6_null_entry; } #endif @@ -1027,16 +1026,17 @@ static struct fib6_node *fib6_repair_tree(struct net *net, pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { - BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); + WARN_ON(fn->fn_flags & RTN_ROOT); #endif if (pn->right == fn) pn->right = child; else if (pn->left == fn) pn->left = child; #if RT6_DEBUG >= 2 - else BUG_TRAP(0); + else + WARN_ON(1); #endif if (child) child->parent = pn; @@ -1156,14 +1156,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->u.dst.obsolete>0) { - BUG_TRAP(fn==NULL); + WARN_ON(fn != NULL); return -ENOENT; } #endif if (fn == NULL || rt == net->ipv6.ip6_null_entry) return -ENOENT; - BUG_TRAP(fn->fn_flags&RTN_RTINFO); + WARN_ON(!(fn->fn_flags & RTN_RTINFO)); if (!(rt->rt6i_flags&RTF_CACHE)) { struct fib6_node *pn = fn; @@ -1268,7 +1268,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->node = pn; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { - BUG_TRAP(fn->fn_flags&RTN_ROOT); + WARN_ON(!(fn->fn_flags & RTN_ROOT)); w->state = FWS_L; continue; } @@ -1283,7 +1283,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) continue; } #if RT6_DEBUG >= 2 - BUG_TRAP(0); + WARN_ON(1); #endif } } @@ -1325,7 +1325,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) } return 0; } - BUG_TRAP(res==0); + WARN_ON(res != 0); } w->leaf = rt; return 0; @@ -1449,27 +1449,23 @@ void fib6_run_gc(unsigned long expires, struct net *net) gc_args.timeout = expires ? (int)expires : net->ipv6.sysctl.ip6_rt_gc_interval; } else { - local_bh_disable(); - if (!spin_trylock(&fib6_gc_lock)) { - mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ); - local_bh_enable(); + if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); return; } gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; } - gc_args.more = 0; - icmp6_dst_gc(&gc_args.more); + gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(net->ipv6.ip6_fib_timer, jiffies + - net->ipv6.sysctl.ip6_rt_gc_interval); - else { - del_timer(net->ipv6.ip6_fib_timer); - net->ipv6.ip6_fib_timer->expires = 0; - } + mod_timer(&net->ipv6.ip6_fib_timer, + round_jiffies(jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval)); + else + del_timer(&net->ipv6.ip6_fib_timer); spin_unlock_bh(&fib6_gc_lock); } @@ -1480,24 +1476,15 @@ static void fib6_gc_timer_cb(unsigned long arg) static int fib6_net_init(struct net *net) { - int ret; - struct timer_list *timer; - - ret = -ENOMEM; - timer = kzalloc(sizeof(*timer), GFP_KERNEL); - if (!timer) - goto out; - - setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net); - net->ipv6.ip6_fib_timer = timer; + setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = - kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ, - GFP_KERNEL); + net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, + sizeof(*net->ipv6.fib_table_hash), + GFP_KERNEL); if (!net->ipv6.fib_table_hash) goto out_rt6_stats; @@ -1523,9 +1510,7 @@ static int fib6_net_init(struct net *net) #endif fib6_tables_init(net); - ret = 0; -out: - return ret; + return 0; #ifdef CONFIG_IPV6_MULTIPLE_TABLES out_fib6_main_tbl: @@ -1536,15 +1521,14 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: - kfree(timer); - goto out; + return -ENOMEM; } static void fib6_net_exit(struct net *net) { rt6_ifdown(net, NULL); - del_timer_sync(net->ipv6.ip6_fib_timer); - kfree(net->ipv6.ip6_fib_timer); + del_timer_sync(&net->ipv6.ip6_fib_timer); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.fib6_local_tbl); #endif diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 17eb48b8e329..936f48946e20 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $ - * * Based in linux/net/ipv4/ip_input.c * * This program is free software; you can redistribute it and/or @@ -61,6 +59,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; + struct net *net = dev_net(skb->dev); if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); @@ -71,10 +70,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || + !idev || unlikely(idev->cnf.disable_ipv6)) { + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; } @@ -119,11 +119,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, + idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); @@ -131,7 +132,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(skb) < 0) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; } @@ -142,7 +143,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); @@ -162,6 +163,7 @@ static int ip6_input_finish(struct sk_buff *skb) int nexthdr, raw; u8 hash; struct inet6_dev *idev; + struct net *net = dev_net(skb->dst->dev); /* * Parse extension headers @@ -206,24 +208,25 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); + IP6_INC_STATS_BH(net, idev, + IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev); } } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } rcu_read_unlock(); return 0; discard: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -241,7 +244,8 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), + ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); @@ -250,7 +254,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (ipv6_devconf.mc_forwarding && + if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 48cdce9c696c..c77db0b95e26 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $ - * * Based on linux/net/ipv4/ip_output.c * * This program is free software; you can redistribute it and/or @@ -105,7 +103,8 @@ static int ip6_output_finish(struct sk_buff *skb) else if (dst->neighbour) return dst->neighbour->output(skb); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; @@ -118,7 +117,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; @@ -152,13 +151,14 @@ static int ip6_output2(struct sk_buff *skb) ip6_dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(dev_net(dev), idev, + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); } return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, @@ -175,6 +175,14 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + if (unlikely(idev->cnf.disable_ipv6)) { + IP6_INC_STATS(dev_net(skb->dst->dev), idev, + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return 0; + } + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); @@ -189,6 +197,7 @@ int ip6_output(struct sk_buff *skb) int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt, int ipfragok) { + struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; @@ -211,7 +220,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; @@ -231,6 +240,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb_reset_network_header(skb); hdr = ipv6_hdr(skb); + /* Allow local fragmentation. */ + if (ipfragok) + skb->local_df = 1; + /* * Fill in the IPv6 header */ @@ -260,8 +273,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -271,7 +284,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -406,11 +419,14 @@ int ip6_forward(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); - if (ipv6_devconf.forwarding == 0) + if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb_warn_if_lro(skb)) + goto drop; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -443,26 +459,28 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; } /* XXX: idev->cnf.proxy_ndp? */ - if (ipv6_devconf.proxy_ndp && + if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb->dst; @@ -497,7 +515,8 @@ int ip6_forward(struct sk_buff *skb) int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ - if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) + if (addrtype == IPV6_ADDR_ANY || + addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, @@ -510,14 +529,16 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -527,12 +548,12 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -600,7 +621,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { - struct net_device *dev; struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb->dst; struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; @@ -610,8 +630,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; + struct net *net = dev_net(skb->dst->dev); - dev = rt->u.dst.dev; hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -624,7 +644,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (!skb->local_df) { skb->dev = skb->dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -673,7 +694,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } @@ -724,7 +746,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -737,7 +760,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGOKS); dst_release(&rt->u.dst); return 0; } @@ -748,7 +772,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGFAILS); dst_release(&rt->u.dst); return err; } @@ -782,7 +807,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; @@ -846,15 +871,16 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGCREATES); } - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; @@ -921,7 +947,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, goto out_err_release; if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, + err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, &fl->fl6_dst, sk ? inet6_sk(sk)->srcprefs : 0, &fl->fl6_src); @@ -930,46 +956,46 @@ static int ip6_dst_lookup_tail(struct sock *sk, } #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - /* - * Here if the dst entry we've looked up - * has a neighbour entry that is in the INCOMPLETE - * state and the src address from the flow is - * marked as OPTIMISTIC, we release the found - * dst entry and replace it instead with the - * dst entry of the nexthop router - */ - if (!((*dst)->neighbour->nud_state & NUD_VALID)) { - struct inet6_ifaddr *ifp; - struct flowi fl_gw; - int redirect; - - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, - (*dst)->dev, 1); - - redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); - if (ifp) - in6_ifa_put(ifp); - - if (redirect) { - /* - * We need to get the dst entry for the - * default router instead - */ - dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); - if ((err = (*dst)->error)) - goto out_err_release; - } + /* + * Here if the dst entry we've looked up + * has a neighbour entry that is in the INCOMPLETE + * state and the src address from the flow is + * marked as OPTIMISTIC, we release the found + * dst entry and replace it instead with the + * dst entry of the nexthop router + */ + if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + struct inet6_ifaddr *ifp; + struct flowi fl_gw; + int redirect; + + ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + (*dst)->dev, 1); + + redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); + if (ifp) + in6_ifa_put(ifp); + + if (redirect) { + /* + * We need to get the dst entry for the + * default router instead + */ + dst_release(*dst); + memcpy(&fl_gw, fl, sizeof(struct flowi)); + memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw); + if ((err = (*dst)->error)) + goto out_err_release; } + } #endif return 0; out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; @@ -1374,7 +1400,7 @@ alloc_new_skb: return 0; error: inet->cork.length -= length; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1398,6 +1424,7 @@ int ip6_push_pending_frames(struct sock *sk) struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; @@ -1451,12 +1478,12 @@ int ip6_push_pending_frames(struct sock *sk) skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); - ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); @@ -1480,7 +1507,7 @@ void ip6_flush_pending_frames(struct sock *sk) while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { if (skb->dst) - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2bda3ba100b1..64ce3d33d9c6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -6,8 +6,6 @@ * Ville Nuorvala <vnuorval@tcs.hut.fi> * Yasuyuki Kozakai <kozakai@linux-ipv6.org> * - * $Id$ - * * Based on: * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c * @@ -711,7 +709,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } if (!ip6_tnl_rcv_ctl(t)) { - t->stat.rx_dropped++; + t->dev->stats.rx_dropped++; read_unlock(&ip6_tnl_lock); goto discard; } @@ -728,8 +726,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, dscp_ecn_decapsulate(t, ipv6h, skb); - t->stat.rx_packets++; - t->stat.rx_bytes += skb->len; + t->dev->stats.rx_packets++; + t->dev->stats.rx_bytes += skb->len; netif_rx(skb); read_unlock(&ip6_tnl_lock); return 0; @@ -849,7 +847,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, __u32 *pmtu) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; struct dst_entry *dst; @@ -1043,19 +1041,19 @@ static int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; int ret; if (t->recursion++) { - t->stat.collisions++; + stats->collisions++; goto tx_err; } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ret = ip6ip6_tnl_xmit(skb, dev); break; default: @@ -1289,19 +1287,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } /** - * ip6_tnl_get_stats - return the stats for tunnel device - * @dev: virtual device associated with tunnel - * - * Return: stats for device - **/ - -static struct net_device_stats * -ip6_tnl_get_stats(struct net_device *dev) -{ - return &(((struct ip6_tnl *)netdev_priv(dev))->stat); -} - -/** * ip6_tnl_change_mtu - change mtu manually for tunnel device * @dev: virtual device associated with tunnel * @new_mtu: the new mtu @@ -1334,7 +1319,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->uninit = ip6_tnl_dev_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ip6_tnl_xmit; - dev->get_stats = ip6_tnl_get_stats; dev->do_ioctl = ip6_tnl_ioctl; dev->change_mtu = ip6_tnl_change_mtu; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 14796181e8b5..182f8a177e7f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -388,8 +388,8 @@ static int pim6_rcv(struct sk_buff *skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -409,26 +409,20 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats *)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -436,9 +430,7 @@ static struct net_device *ip6mr_reg_vif(void) { struct net_device *dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", - reg_vif_setup); - + dev = alloc_netdev(0, "pim6reg", reg_vif_setup); if (dev == NULL) return NULL; @@ -451,6 +443,7 @@ static struct net_device *ip6mr_reg_vif(void) if (dev_open(dev)) goto failure; + dev_hold(dev); return dev; failure: @@ -603,6 +596,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &vif6_table[vifi]; struct net_device *dev; + int err; /* Is vif busy ? */ if (MIF_EXISTS(vifi)) @@ -620,20 +614,28 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) dev = ip6mr_reg_vif(); if (!dev) return -ENOBUFS; + err = dev_set_allmulti(dev, 1); + if (err) { + unregister_netdevice(dev); + dev_put(dev); + return err; + } break; #endif case 0: dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; - dev_put(dev); + err = dev_set_allmulti(dev, 1); + if (err) { + dev_put(dev); + return err; + } break; default: return -EINVAL; } - dev_set_allmulti(dev, 1); - /* * Fill in the VIF structures */ @@ -652,7 +654,6 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - dev_hold(dev); v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) @@ -934,7 +935,7 @@ static int ip6mr_device_event(struct notifier_block *this, struct mif_device *v; int ct; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) @@ -956,23 +957,51 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ -void __init ip6_mr_init(void) +int __init ip6_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip6_mrt_cache", sizeof(struct mfc6_cache), 0, SLAB_HWCACHE_ALIGN, NULL); if (!mrt_cachep) - panic("cannot allocate ip6_mrt_cache"); + return -ENOMEM; setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip6_mr_notifier); + err = register_netdevice_notifier(&ip6_mr_notifier); + if (err) + goto reg_notif_fail; +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip6_mr_cache", + 0, &ip6mr_mfc_fops)) + goto proc_cache_fail; +#endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops); - proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip6_mr_vif"); #endif + return err; } +void ip6_mr_cleanup(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(&init_net, "ip6_mr_cache"); + proc_net_remove(&init_net, "ip6_mr_vif"); +#endif + unregister_netdevice_notifier(&ip6_mr_notifier); + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); +} static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) { @@ -1248,7 +1277,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int #endif /* - * Spurious command, or MRT_VERSION which you cannot + * Spurious command, or MRT6_VERSION which you cannot * set. */ default: @@ -1354,7 +1383,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct sk_buff *skb) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTFORWDATAGRAMS); return dst_output(skb); } @@ -1377,8 +1407,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) if (vif->flags & MIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); kfree_skb(skb); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ee6de425ce6b..4545e4306862 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -50,125 +50,6 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -struct ipcomp6_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp6_resource_mutex); -static void **ipcomp6_scratches; -static int ipcomp6_scratch_users; -static LIST_HEAD(ipcomp6_tfms_list); - -static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - int plen, dlen; - struct ipcomp_data *ipcd = x->data; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - - /* decompression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - if (err) - goto out_put_cpu; - - if (dlen < (plen + sizeof(*ipch))) { - err = -EINVAL; - goto out_put_cpu; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) { - goto out_put_cpu; - } - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); - err = nexthdr; - -out_put_cpu: - put_cpu(); -out: - return err; -} - -static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - int plen, dlen; - u8 *start, *scratch; - struct crypto_comp *tfm; - int cpu; - - /* check whether datagram len is larger than threshold */ - if (skb->len < ipcd->threshold) { - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - /* compression */ - plen = skb->len; - dlen = IPCOMP_SCRATCH_SIZE; - start = skb->data; - - cpu = get_cpu(); - scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); - tfm = *per_cpu_ptr(ipcd->tfms, cpu); - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err || (dlen + sizeof(*ipch)) >= plen) { - put_cpu(); - goto out_ok; - } - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - - /* insert ipcomp header and replace datagram */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; - -out_ok: - skb_push(skb, -skb_network_offset(skb)); - - return 0; -} - static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -251,160 +132,9 @@ out: return err; } -static void ipcomp6_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp6_scratch_users) - return; - - scratches = ipcomp6_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) { - void *scratch = *per_cpu_ptr(scratches, i); - - vfree(scratch); - } - - free_percpu(scratches); -} - -static void **ipcomp6_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp6_scratch_users++) - return ipcomp6_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp6_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp6_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp6_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) -{ - struct ipcomp6_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp6_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp6_free_tfms(tfms); - return NULL; -} - -static void ipcomp6_free_data(struct ipcomp_data *ipcd) -{ - if (ipcd->tfms) - ipcomp6_free_tfms(ipcd->tfms); - ipcomp6_free_scratches(); -} - -static void ipcomp6_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp6_resource_mutex); - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); - - xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); -} - static int ipcomp6_init_state(struct xfrm_state *x) { - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; - - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { @@ -417,39 +147,21 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp6_resource_mutex); - if (!ipcomp6_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp6_resource_mutex); -error: - ipcomp6_free_data(ipcd); - mutex_unlock(&ipcomp6_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -460,9 +172,9 @@ static const struct xfrm_type ipcomp6_type = .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, - .destructor = ipcomp6_destroy, - .input = ipcomp6_input, - .output = ipcomp6_output, + .destructor = ipcomp_destroy, + .input = ipcomp_input, + .output = ipcomp_output, .hdr_offset = xfrm6_find_1stfragopt, }; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 86e28a75267f..4e5eac301f91 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,8 +7,6 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -61,7 +59,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); -int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) +int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; @@ -83,8 +81,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) *rap = ra->next; write_unlock_bh(&ip6_ra_lock); - if (ra->destructor) - ra->destructor(sk); sock_put(sk); kfree(ra); return 0; @@ -96,7 +92,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) } new_ra->sk = sk; new_ra->sel = sel; - new_ra->destructor = destructor; new_ra->next = ra; *rap = new_ra; sock_hold(sk); @@ -351,6 +346,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ if (optlen == 0) optval = NULL; + else if (optval == NULL) + goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) goto e_inval; @@ -634,7 +631,7 @@ done: case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; - retv = ip6_ra_control(sk, val, NULL); + retv = ip6_ra_control(sk, val); break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) @@ -914,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif; + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } @@ -924,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif; + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } @@ -1043,7 +1040,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, dst_release(dst); } if (val < 0) - val = ipv6_devconf.hop_limit; + val = sock_net(sk)->ipv6.devconf_all->hop_limit; break; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fd632dd7f98d..d7b3c6d398ae 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $ - * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * * This program is free software; you can redistribute it and/or @@ -153,7 +151,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IGMP6_UNSOLICITED_IVAL (10*HZ) #define MLD_QRV_DEFAULT 2 -#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \ +#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \ (idev)->cnf.force_mld_version == 1 || \ ((idev)->mc_v1_seen && \ time_before(jiffies, (idev)->mc_v1_seen))) @@ -164,7 +162,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \ (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) -#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) #define IPV6_MLD_MAX_MSF 64 @@ -370,10 +367,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, int pmclocked = 0; int err; - if (pgsr->gsr_group.ss_family != AF_INET6 || - pgsr->gsr_source.ss_family != AF_INET6) - return -EINVAL; - source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr; @@ -1453,7 +1446,7 @@ static void mld_sendpack(struct sk_buff *skb) int err; struct flowi fl; - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); @@ -1481,11 +1474,11 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); @@ -1778,7 +1771,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct flowi fl; rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); if (type == ICMPV6_MGM_REDUCTION) @@ -1794,7 +1787,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (skb == NULL) { rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; @@ -1846,11 +1839,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS(net, idev, type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index ad1cc5bbf977..31295c8f6196 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -164,8 +164,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) calc_padlen(sizeof(*dstopt), 6)); hao->type = IPV6_TLV_HAO; + BUILD_BUG_ON(sizeof(*hao) != 18); hao->length = sizeof(*hao) - 2; - BUG_TRAP(hao->length == 16); len = ((char *)hao - (char *)dstopt) + sizeof(*hao); @@ -174,7 +174,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); spin_unlock_bh(&x->lock); - BUG_TRAP(len == x->props.header_len); + WARN_ON(len != x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; return 0; @@ -317,7 +317,7 @@ static int mip6_destopt_init_state(struct xfrm_state *x) x->props.header_len = sizeof(struct ipv6_destopt_hdr) + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + sizeof(struct ipv6_destopt_hao); - BUG_TRAP(x->props.header_len == 24); + WARN_ON(x->props.header_len != 24); return 0; } @@ -380,7 +380,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) rt2->rt_hdr.segments_left = 1; memset(&rt2->reserved, 0, sizeof(rt2->reserved)); - BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + WARN_ON(rt2->rt_hdr.hdrlen != 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); spin_lock_bh(&x->lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 282fdb31f8ed..840b15780a36 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -516,13 +516,13 @@ static void __ndisc_send(struct net_device *dev, skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) @@ -549,7 +549,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, override = 0; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, + if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; @@ -784,15 +784,17 @@ static void ndisc_recv_ns(struct sk_buff *skb) idev = ifp->idev; } else { + struct net *net = dev_net(dev); + idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ return; } - if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) || + if (ipv6_chk_acast_addr(net, dev, &msg->target) || (idev->cnf.forwarding && - (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && + (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -921,6 +923,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh) { u8 old_flags = neigh->flags; + struct net *net = dev_net(dev); if (neigh->nud_state & NUD_FAILED) goto out; @@ -931,8 +934,8 @@ static void ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) { + net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1578,12 +1581,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8c6c5e71f210..6b29b03925f1 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb) .saddr = iph->saddr, } }, }; - dst = ip6_route_output(&init_net, skb->sk, &fl); + dst = ip6_route_output(dev_net(skb->dst->dev), skb->sk, &fl); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -33,7 +33,8 @@ int ip6_route_me_harder(struct sk_buff *skb) #endif if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(&init_net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6cae5475737e..53ea512c4608 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -55,30 +55,29 @@ config IP6_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP6_NF_IPTABLES + # The simple matches. -config IP6_NF_MATCH_RT - tristate '"rt" Routing header match support' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_AH + tristate '"ah" match support' depends on NETFILTER_ADVANCED help - rt matching allows you to match packets based on the routing - header of the packet. + This module allows one to match AH packets. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_OPTS - tristate '"hopbyhop" and "dst" opts header match support' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_EUI64 + tristate '"eui64" address check' depends on NETFILTER_ADVANCED help - This allows one to match packets based on the hop-by-hop - and destination options headers of a packet. + This module performs checking on the IPv6 source address + Compares the last 64 bits with the EUI64 (delivered + from the MAC address) address To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_FRAG tristate '"frag" Fragmentation header match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help frag matching allows you to match packets based on the fragmentation @@ -86,9 +85,17 @@ config IP6_NF_MATCH_FRAG To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_OPTS + tristate '"hbh" hop-by-hop and "dst" opts header match support' + depends on NETFILTER_ADVANCED + help + This allows one to match packets based on the hop-by-hop + and destination options headers of a packet. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_MATCH_HL tristate '"hl" match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help HL matching allows you to match packets based on the hop @@ -98,7 +105,6 @@ config IP6_NF_MATCH_HL config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' - depends on IP6_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This module allows one to match packets based upon @@ -106,54 +112,40 @@ config IP6_NF_MATCH_IPV6HEADER To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_AH - tristate '"ah" match support' - depends on IP6_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This module allows one to match AH packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_MH tristate '"mh" match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help This module allows one to match MH packets. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_EUI64 - tristate '"eui64" address check' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_RT + tristate '"rt" Routing header match support' depends on NETFILTER_ADVANCED help - This module performs checking on the IPv6 source address - Compares the last 64 bits with the EUI64 (delivered - from the MAC address) address + rt matching allows you to match packets based on the routing + header of the packet. To compile it as a module, choose M here. If unsure, say N. # The targets -config IP6_NF_FILTER - tristate "Packet filtering" - depends on IP6_NF_IPTABLES +config IP6_NF_TARGET_LOG + tristate "LOG target support" default m if NETFILTER_ADVANCED=n help - Packet filtering defines a table `filter', which has a series of - rules for simple packet filtering at local input, forwarding and - local output. See the man page for iptables(8). + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_LOG - tristate "LOG target support" - depends on IP6_NF_FILTER +config IP6_NF_FILTER + tristate "Packet filtering" default m if NETFILTER_ADVANCED=n help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. @@ -170,7 +162,6 @@ config IP6_NF_TARGET_REJECT config IP6_NF_MANGLE tristate "Packet mangling" - depends on IP6_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -198,7 +189,6 @@ config IP6_NF_TARGET_HL config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very @@ -208,5 +198,18 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP6_NF_SECURITY + tristate "Security table" + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + +endif # IP6_NF_IPTABLES + endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbf2c14ed887..3f17c948eefb 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o +obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 2eff3ae8977d..5859c046cbc4 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -159,7 +159,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -226,8 +225,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; @@ -483,7 +480,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0b4557e03431..a33485dc81cb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -200,32 +200,25 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ip6t_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip6_tables: error: `%s'\n", (char *)targinfo); + printk("ip6_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ip6t_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - unsigned int protoff, - bool *hotdrop) +do_match(struct ip6t_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, protoff, hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -355,8 +348,6 @@ ip6t_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - int offset = 0; - unsigned int protoff = 0; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; @@ -364,6 +355,8 @@ ip6t_do_table(struct sk_buff *skb, void *table_base; struct ip6t_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ indev = in ? in->name : nulldevname; @@ -374,6 +367,11 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV6; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -388,12 +386,10 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(e); IP_NF_ASSERT(back); if (ip6_packet_match(skb, indev, outdev, &e->ipv6, - &protoff, &offset, &hotdrop)) { + &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { struct ip6t_entry_target *t; - if (IP6T_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, protoff, &hotdrop) != 0) + if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, @@ -441,15 +437,15 @@ ip6t_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; + #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom @@ -602,12 +598,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ip6t_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV6; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -632,34 +633,28 @@ check_entry(struct ip6t_entry *e, const char *name) return 0; } -static int check_match(struct ip6t_entry_match *m, const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, unsigned int *i) +static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ip6t_ip6 *ipv6 = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), - name, hookmask, ipv6->proto, - ipv6->invflags & IP6T_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ip6t_entry_match *m, - const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, +find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -674,7 +669,7 @@ find_check_match(struct ip6t_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ipv6, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -686,23 +681,26 @@ err: static int check_target(struct ip6t_entry *e, const char *name) { - struct ip6t_entry_target *t; - struct xt_target *target; + struct ip6t_entry_target *t = ip6t_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV6, + }; int ret; t = ip6t_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ipv6.proto, - e->ipv6.invflags & IP6T_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -713,14 +711,18 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ipv6; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV6; + ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -795,6 +797,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, static int cleanup_entry(struct ip6t_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ip6t_entry_target *t; if (i && (*i)-- == 0) @@ -803,9 +806,13 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) /* Cleanup all matches */ IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV6; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1677,10 +1684,14 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name, { unsigned int j; int ret; + struct xt_mtchk_param mtpar; j = 0; - ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ipv6; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV6; + ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2146,30 +2157,23 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp6_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; - const struct ip6t_icmp *icmpinfo = matchinfo; + const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2181,14 +2185,9 @@ icmp6_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static bool -icmp6_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp6_checkentry(const struct xt_mtchk_param *par) { - const struct ip6t_icmp *icmpinfo = matchinfo; + const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IP6T_ICMP_INV); diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index d5f8fd5f29d3..27b5adf670a2 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -19,12 +19,10 @@ MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); MODULE_LICENSE("GPL"); static unsigned int -hl_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ip6h; - const struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = par->targinfo; int new_hl; if (!skb_make_writable(skb, skb->len)) @@ -56,12 +54,9 @@ hl_tg6(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -hl_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool hl_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = par->targinfo; if (info->mode > IP6T_HL_MAXMODE) { printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", @@ -78,7 +73,7 @@ hl_tg6_check(const char *tablename, const void *entry, static struct xt_target hl_tg6_reg __read_mostly = { .name = "HL", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = hl_tg6, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 3a2316974f83..caa441d09567 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -385,7 +385,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ip6t_log_packet(unsigned int pf, +ip6t_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -438,28 +438,24 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -log_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ip6t_log_info *loginfo = targinfo; + const struct ip6t_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); + ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, + &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_log_info *loginfo = targinfo; + const struct ip6t_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -475,7 +471,7 @@ log_tg6_check(const char *tablename, const void *entry, static struct xt_target log_tg6_reg __read_mostly = { .name = "LOG", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = log_tg6, .targetsize = sizeof(struct ip6t_log_info), .checkentry = log_tg6_check, @@ -495,7 +491,7 @@ static int __init log_tg6_init(void) ret = xt_register_target(&log_tg6_reg); if (ret < 0) return ret; - nf_log_register(PF_INET6, &ip6t_logger); + nf_log_register(NFPROTO_IPV6, &ip6t_logger); return 0; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 44c8d65a2431..0981b4ccb8b1 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -35,7 +35,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ -static void send_reset(struct sk_buff *oldskb) +static void send_reset(struct net *net, struct sk_buff *oldskb) { struct sk_buff *nskb; struct tcphdr otcph, *tcph; @@ -94,7 +94,7 @@ static void send_reset(struct sk_buff *oldskb) fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) @@ -163,20 +163,20 @@ static void send_reset(struct sk_buff *oldskb) } static inline void -send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) +send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum) { if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = init_net.loopback_dev; + skb_in->dev = net->loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } static unsigned int -reject_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ip6t_reject_info *reject = targinfo; + const struct ip6t_reject_info *reject = par->targinfo; + struct net *net = dev_net((par->in != NULL) ? par->in : par->out); pr_debug("%s: medium point\n", __func__); /* WARNING: This code causes reentry within ip6tables. @@ -184,25 +184,25 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(skb, ICMPV6_NOROUTE, hooknum); + send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(skb); + send_reset(net, skb); break; default: if (net_ratelimit()) @@ -213,13 +213,10 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_reject_info *rejinfo = targinfo; - const struct ip6t_entry *e = entry; + const struct ip6t_reject_info *rejinfo = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); @@ -237,7 +234,7 @@ reject_tg6_check(const char *tablename, const void *entry, static struct xt_target reject_tg6_reg __read_mostly = { .name = "REJECT", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = reject_tg6, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 429629fd63b6..3a82f24746b9 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -36,14 +36,11 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; - const struct ip6t_ah *ahinfo = matchinfo; + const struct ip6t_ah *ahinfo = par->matchinfo; unsigned int ptr; unsigned int hdrlen = 0; int err; @@ -51,13 +48,13 @@ ah_mt6(const struct sk_buff *skb, const struct net_device *in, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -93,13 +90,9 @@ ah_mt6(const struct sk_buff *skb, const struct net_device *in, !(ahinfo->hdrres && ah->reserved); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ah *ahinfo = matchinfo; + const struct ip6t_ah *ahinfo = par->matchinfo; if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags); @@ -110,7 +103,7 @@ ah_mt6_check(const char *tablename, const void *entry, static struct xt_match ah_mt6_reg __read_mostly = { .name = "ah", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = ah_mt6, .matchsize = sizeof(struct ip6t_ah), .checkentry = ah_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 8f331f12b2ec..db610bacbcce 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -20,18 +20,15 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -eui64_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { unsigned char eui64[8]; int i = 0; if (!(skb_mac_header(skb) >= skb->head && skb_mac_header(skb) + ETH_HLEN <= skb->data) && - offset != 0) { - *hotdrop = true; + par->fragoff != 0) { + *par->hotdrop = true; return false; } @@ -60,7 +57,7 @@ eui64_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match eui64_mt6_reg __read_mostly = { .name = "eui64", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = eui64_mt6, .matchsize = sizeof(int), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index e2bbc63dba5b..673aa0a5084e 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -35,27 +35,24 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -frag_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct frag_hdr _frag; const struct frag_hdr *fh; - const struct ip6t_frag *fraginfo = matchinfo; + const struct ip6t_frag *fraginfo = par->matchinfo; unsigned int ptr; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -110,13 +107,9 @@ frag_mt6(const struct sk_buff *skb, const struct net_device *in, && (ntohs(fh->frag_off) & IP6_MF)); } -/* Called when user tries to insert an entry of this type. */ -static bool -frag_mt6_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool frag_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_frag *fraginfo = matchinfo; + const struct ip6t_frag *fraginfo = par->matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags); @@ -127,7 +120,7 @@ frag_mt6_check(const char *tablename, const void *ip, static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), .checkentry = frag_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 62e39ace0588..cbe8dec9744b 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -42,14 +42,11 @@ MODULE_ALIAS("ip6t_dst"); */ static bool -hbh_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; - const struct ip6t_opts *optinfo = matchinfo; + const struct ip6t_opts *optinfo = par->matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; @@ -61,16 +58,16 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, unsigned int optlen; int err; - err = ipv6_find_hdr(skb, &ptr, match->data, NULL); + err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -97,8 +94,6 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, hdrlen -= 2; if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; - } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - pr_debug("Not strict - not implemented"); } else { pr_debug("Strict "); pr_debug("#%d ", optinfo->optsnr); @@ -165,25 +160,27 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, return false; } -/* Called when user tries to insert an entry of this type. */ -static bool -hbh_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hbh_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_opts *optsinfo = matchinfo; + const struct ip6t_opts *optsinfo = par->matchinfo; if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); return false; } + + if (optsinfo->flags & IP6T_OPTS_NSTRICT) { + pr_debug("ip6t_opts: Not strict - not implemented"); + return false; + } + return true; } static struct xt_match hbh_mt6_reg[] __read_mostly = { { .name = "hbh", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, @@ -192,7 +189,7 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = { }, { .name = "dst", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 345671673845..c964dca1132d 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -19,12 +19,9 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); MODULE_LICENSE("GPL"); -static bool -hl_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_hl_info *info = matchinfo; + const struct ip6t_hl_info *info = par->matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); switch (info->mode) { @@ -51,7 +48,7 @@ hl_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match hl_mt6_reg __read_mostly = { .name = "hl", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 317a8960a757..14e6724d5672 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -27,12 +27,9 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_ipv6header_info *info = matchinfo; + const struct ip6t_ipv6header_info *info = par->matchinfo; unsigned int temp; int len; u8 nexthdr; @@ -121,12 +118,9 @@ ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, } } -static bool -ipv6header_mt6_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ipv6header_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ipv6header_info *info = matchinfo; + const struct ip6t_ipv6header_info *info = par->matchinfo; /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && @@ -138,7 +132,7 @@ ipv6header_mt6_check(const char *tablename, const void *ip, static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = ipv6header_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index e06678d07ec8..aafe4e66577b 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -37,32 +37,29 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) return (type >= min && type <= max) ^ invert; } -static bool -mh_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; - const struct ip6t_mh *mhinfo = matchinfo; + const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); + mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil MH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { duprintf("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -70,13 +67,9 @@ mh_mt6(const struct sk_buff *skb, const struct net_device *in, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } -/* Called when user tries to insert an entry of this type. */ -static bool -mh_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool mh_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_mh *mhinfo = matchinfo; + const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); @@ -84,7 +77,7 @@ mh_mt6_check(const char *tablename, const void *entry, static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 81aaf7aaaabf..356b8d6f6baa 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -36,14 +36,11 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) return r; } -static bool -rt_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ipv6_rt_hdr _route; const struct ipv6_rt_hdr *rh; - const struct ip6t_rt *rtinfo = matchinfo; + const struct ip6t_rt *rtinfo = par->matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; @@ -55,13 +52,13 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); if (rh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -189,13 +186,9 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in, return false; } -/* Called when user tries to insert an entry of this type. */ -static bool -rt_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool rt_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_rt *rtinfo = matchinfo; + const struct ip6t_rt *rtinfo = par->matchinfo; if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags); @@ -214,7 +207,7 @@ rt_mt6_check(const char *tablename, const void *entry, static struct xt_match rt_mt6_reg __read_mostly = { .name = "rt", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = rt_mt6, .matchsize = sizeof(struct ip6t_rt), .checkentry = rt_mt6_check, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index f979e48b469b..b110a8a85a14 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,13 +61,25 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +ip6t_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_filter); +} + +static unsigned int +ip6t_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_filter); } static unsigned int @@ -87,19 +99,20 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_filter); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_hook, + .hook = ip6t_local_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { - .hook = ip6t_hook, + .hook = ip6t_forward_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b48d93..d0b31b259d4d 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -67,17 +67,29 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_route_hook(unsigned int hook, +ip6t_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_mangle); } static unsigned int -ip6t_local_hook(unsigned int hook, +ip6t_post_routing_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_mangle); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -108,7 +120,8 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) @@ -122,35 +135,35 @@ ip6t_local_hook(unsigned int hook, static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_route_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_route_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_local_out_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_route_hook, + .hook = ip6t_post_routing_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_POST_ROUTING, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 92b91077ac29..109fab6f831a 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -45,25 +45,37 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_hook(unsigned int hook, +ip6t_pre_routing_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_raw); + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_raw); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_raw); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_hook, + .hook = ip6t_pre_routing_hook, .pf = PF_INET6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, { - .hook = ip6t_hook, + .hook = ip6t_local_out_hook, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FIRST, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c new file mode 100644 index 000000000000..20bc52f13e43 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_security.c @@ -0,0 +1,172 @@ +/* + * "security" table for IPv6 + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ip6t_replace repl; + struct ip6t_standard entries[3]; + struct ip6t_error term; +} initial_table __net_initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + }, + .entries = { + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IP6T_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET6, +}; + +static unsigned int +ip6t_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_security); +} + +static unsigned int +ip6t_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_security); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* TBD: handle short packets via raw socket */ + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_security); +} + +static struct nf_hook_ops ip6t_ops[] __read_mostly = { + { + .hook = ip6t_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_SECURITY, + }, +}; + +static int __net_init ip6table_security_net_init(struct net *net) +{ + net->ipv6.ip6table_security = + ip6t_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv6.ip6table_security)) + return PTR_ERR(net->ipv6.ip6table_security); + + return 0; +} + +static void __net_exit ip6table_security_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_security); +} + +static struct pernet_operations ip6table_security_net_ops = { + .init = ip6table_security_net_init, + .exit = ip6table_security_net_exit, +}; + +static int __init ip6table_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&ip6table_security_net_ops); + return ret; +} + +static void __exit ip6table_security_fini(void) +{ + nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + unregister_pernet_subsys(&ip6table_security_net_ops); +} + +module_init(ip6table_security_init); +module_exit(ip6table_security_fini); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 85050c072abd..e91db16611d9 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_STOLEN; } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int __ipv6_conntrack_in(struct net *net, + unsigned int hooknum, + struct sk_buff *skb, + int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; @@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, reasm); + ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } @@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, skb); + return nf_conntrack_in(net, PF_INET6, hooknum, skb); +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ee713b03e9ec..05726177903f 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -81,7 +81,7 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -89,12 +89,11 @@ static int icmpv6_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); } @@ -123,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct sk_buff *skb, +icmpv6_error_message(struct net *net, + struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, unsigned int hooknum) @@ -157,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple); + h = nf_conntrack_find_get(net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -173,21 +173,21 @@ icmpv6_error_message(struct sk_buff *skb, } static int -icmpv6_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(IPPROTO_ICMPV6)) + if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); @@ -198,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e65e26e210ee..9967ac7a01a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -27,7 +27,6 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> -#include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> @@ -103,39 +102,12 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { }; #endif -static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += nf_frags.rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); - - return c & (INETFRAGS_HASHSZ - 1); -} - static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct nf_ct_frag6_queue *nq; nq = container_of(q, struct nf_ct_frag6_queue, q); - return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); + return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } static void nf_skb_free(struct sk_buff *skb) @@ -207,9 +179,10 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; - hash = ip6qhashfn(id, src, dst); - local_bh_disable(); + read_lock_bh(&nf_frags.lock); + hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); + q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) @@ -415,8 +388,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) fq_kill(fq); - BUG_TRAP(head != NULL); - BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index df0736a4cafa..07f0b76e7427 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -7,8 +7,6 @@ * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * - * Version: $Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $ - * * Authors: David S. Miller (davem@caip.rutgers.edu) * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * @@ -31,8 +29,6 @@ #include <net/transp_v6.h> #include <net/ipv6.h> -static struct proc_dir_entry *proc_net_devsnmp6; - static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -50,6 +46,19 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) return 0; } +static int sockstat6_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, sockstat6_seq_show); +} + +static const struct file_operations sockstat6_seq_fops = { + .owner = THIS_MODULE, + .open = sockstat6_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + static struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES), @@ -166,69 +175,52 @@ snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) static int snmp6_seq_show(struct seq_file *seq, void *v) { - struct inet6_dev *idev = (struct inet6_dev *)seq->private; - - if (idev) { - seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); - } else { - snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); - snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); - } - return 0; -} - -static int sockstat6_seq_open(struct inode *inode, struct file *file) -{ - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, sockstat6_seq_show, net); - if (err < 0) - goto err_open; - + struct net *net = (struct net *)seq->private; + + snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics, + snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics, + snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics); + snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, + snmp6_udp6_list); + snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6, + snmp6_udplite6_list); return 0; - -err_open: - put_net(net); -err_net: - return err; } -static int sockstat6_seq_release(struct inode *inode, struct file *file) +static int snmp6_seq_open(struct inode *inode, struct file *file) { - struct net *net = ((struct seq_file *)file->private_data)->private; - - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, snmp6_seq_show); } -static const struct file_operations sockstat6_seq_fops = { +static const struct file_operations snmp6_seq_fops = { .owner = THIS_MODULE, - .open = sockstat6_seq_open, + .open = snmp6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = sockstat6_seq_release, + .release = single_release_net, }; -static int snmp6_seq_open(struct inode *inode, struct file *file) +static int snmp6_dev_seq_show(struct seq_file *seq, void *v) { - return single_open(file, snmp6_seq_show, PDE(inode)->data); + struct inet6_dev *idev = (struct inet6_dev *)seq->private; + + seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); + snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); + return 0; } -static const struct file_operations snmp6_seq_fops = { +static int snmp6_dev_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, snmp6_dev_seq_show, PDE(inode)->data); +} + +static const struct file_operations snmp6_dev_seq_fops = { .owner = THIS_MODULE, - .open = snmp6_seq_open, + .open = snmp6_dev_seq_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, @@ -237,18 +229,18 @@ static const struct file_operations snmp6_seq_fops = { int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; + struct net *net; if (!idev || !idev->dev) return -EINVAL; - if (dev_net(idev->dev) != &init_net) - return 0; - - if (!proc_net_devsnmp6) + net = dev_net(idev->dev); + if (!net->mib.proc_net_devsnmp6) return -ENOENT; p = proc_create_data(idev->dev->name, S_IRUGO, - proc_net_devsnmp6, &snmp6_seq_fops, idev); + net->mib.proc_net_devsnmp6, + &snmp6_dev_seq_fops, idev); if (!p) return -ENOMEM; @@ -258,12 +250,13 @@ int snmp6_register_dev(struct inet6_dev *idev) int snmp6_unregister_dev(struct inet6_dev *idev) { - if (!proc_net_devsnmp6) + struct net *net = dev_net(idev->dev); + if (!net->mib.proc_net_devsnmp6) return -ENOENT; if (!idev || !idev->stats.proc_dir_entry) return -EINVAL; remove_proc_entry(idev->stats.proc_dir_entry->name, - proc_net_devsnmp6); + net->mib.proc_net_devsnmp6); idev->stats.proc_dir_entry = NULL; return 0; } @@ -273,12 +266,27 @@ static int ipv6_proc_init_net(struct net *net) if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) return -ENOMEM; + + if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + goto proc_snmp6_fail; + + net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); + if (!net->mib.proc_net_devsnmp6) + goto proc_dev_snmp6_fail; return 0; + +proc_snmp6_fail: + proc_net_remove(net, "sockstat6"); +proc_dev_snmp6_fail: + proc_net_remove(net, "dev_snmp6"); + return -ENOMEM; } static void ipv6_proc_exit_net(struct net *net) { proc_net_remove(net, "sockstat6"); + proc_net_remove(net, "dev_snmp6"); + proc_net_remove(net, "snmp6"); } static struct pernet_operations ipv6_proc_ops = { @@ -288,34 +296,11 @@ static struct pernet_operations ipv6_proc_ops = { int __init ipv6_misc_proc_init(void) { - int rc = 0; - - if (register_pernet_subsys(&ipv6_proc_ops)) - goto proc_net_fail; - - if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) - goto proc_snmp6_fail; - - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); - if (!proc_net_devsnmp6) - goto proc_dev_snmp6_fail; -out: - return rc; - -proc_dev_snmp6_fail: - proc_net_remove(&init_net, "snmp6"); -proc_snmp6_fail: - unregister_pernet_subsys(&ipv6_proc_ops); -proc_net_fail: - rc = -ENOMEM; - goto out; + return register_pernet_subsys(&ipv6_proc_ops); } void ipv6_misc_proc_exit(void) { - proc_net_remove(&init_net, "sockstat6"); - proc_net_remove(&init_net, "dev_snmp6"); - proc_net_remove(&init_net, "snmp6"); unregister_pernet_subsys(&ipv6_proc_ops); } diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index f929f47b925e..9ab789159913 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -5,8 +5,6 @@ * * PF_INET6 protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $ - * * Authors: Pedro Roque <roque@di.fc.ul.pt> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3aee12310d94..2ba04d41dc25 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,8 +7,6 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) @@ -379,14 +377,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk,skb)<0) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } return 0; @@ -431,7 +429,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } } @@ -640,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (err) goto error_fault; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -654,7 +652,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1159,18 +1157,18 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->num == IPPROTO_RAW) - ip6_ra_control(sk, -1, NULL); + ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); } -static int raw6_destroy(struct sock *sk) +static void raw6_destroy(struct sock *sk) { lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - return inet6_destroy_sock(sk); + inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) @@ -1253,7 +1251,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode drops\n"); + " uid timeout inode ref pointer drops\n"); else raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535b..af12de071f4c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $ - * * Based on: net/ipv4/ip_fragment.c * * This program is free software; you can redistribute it and/or @@ -101,8 +99,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd) { u32 a, b, c; @@ -112,7 +110,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -127,13 +125,14 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, return c & (INETFRAGS_HASHSZ - 1); } +EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); } int ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -190,7 +189,7 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev) evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); if (evicted) - IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); + IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) @@ -214,8 +213,8 @@ static void ip6_frag_expire(unsigned long data) goto out; rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ @@ -247,7 +246,9 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; - hash = ip6qhashfn(id, src, dst); + + read_lock(&ip6_frags.lock); + hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) @@ -256,7 +257,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, return container_of(q, struct frag_queue, q); oom: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS); return NULL; } @@ -266,6 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; struct net_device *dev; int offset, end; + struct net *net = dev_net(skb->dst->dev); if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -275,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - @@ -308,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); @@ -432,7 +434,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; err: - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -473,8 +476,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - @@ -548,7 +551,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->csum); rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; return 1; @@ -562,7 +566,8 @@ out_oom: printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } @@ -572,24 +577,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net; + struct net *net = dev_net(skb->dst->dev); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - skb_network_header_len(skb)); - return -1; - } + if (hdr->payload_len==0) + goto fail_hdr; + if (!pskb_may_pull(skb, (skb_transport_offset(skb) + - sizeof(struct frag_hdr)))) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - skb_network_header_len(skb)); - return -1; - } + sizeof(struct frag_hdr)))) + goto fail_hdr; hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); @@ -597,13 +595,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); return 1; } - net = dev_net(skb->dev); if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); @@ -620,9 +618,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; + +fail_hdr: + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); + return -1; } static struct inet6_protocol frag_protocol = @@ -632,7 +635,7 @@ static struct inet6_protocol frag_protocol = }; #ifdef CONFIG_SYSCTL -static struct ctl_table ip6_frags_ctl_table[] = { +static struct ctl_table ip6_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", @@ -658,6 +661,10 @@ static struct ctl_table ip6_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { } +}; + +static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", @@ -670,21 +677,20 @@ static struct ctl_table ip6_frags_ctl_table[] = { { } }; -static int ip6_frags_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip6_frags_ctl_table; + table = ip6_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; - table[3].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); @@ -701,7 +707,7 @@ err_alloc: return -ENOMEM; } -static void ip6_frags_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; @@ -709,13 +715,36 @@ static void ip6_frags_sysctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); kfree(table); } + +static struct ctl_table_header *ip6_ctl_header; + +static int ip6_frags_sysctl_register(void) +{ + ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, + ip6_frags_ctl_table); + return ip6_ctl_header == NULL ? -ENOMEM : 0; +} + +static void ip6_frags_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_ctl_header); +} #else -static inline int ip6_frags_sysctl_register(struct net *net) +static inline int ip6_frags_ns_sysctl_register(struct net *net) +{ + return 0; +} + +static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +{ +} + +static inline int ip6_frags_sysctl_register(void) { return 0; } -static inline void ip6_frags_sysctl_unregister(struct net *net) +static inline void ip6_frags_sysctl_unregister(void) { } #endif @@ -728,12 +757,12 @@ static int ipv6_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv6.frags); - return ip6_frags_sysctl_register(net); + return ip6_frags_ns_sysctl_register(net); } static void ipv6_frags_exit_net(struct net *net) { - ip6_frags_sysctl_unregister(net); + ip6_frags_ns_sysctl_unregister(net); inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); } @@ -750,7 +779,13 @@ int __init ipv6_frag_init(void) if (ret) goto out; - register_pernet_subsys(&ip6_frags_ops); + ret = ip6_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&ip6_frags_ops); + if (ret) + goto err_pernet; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; @@ -763,11 +798,18 @@ int __init ipv6_frag_init(void) inet_frags_init(&ip6_frags); out: return ret; + +err_pernet: + ip6_frags_sysctl_unregister(); +err_sysctl: + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + goto out; } void ipv6_frag_exit(void) { inet_frags_fini(&ip6_frags); + ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1f3e19b06c7..89dc69924340 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -230,7 +228,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) static inline int rt6_need_strict(struct in6_addr *daddr) { return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); } /* @@ -239,21 +237,26 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, + struct in6_addr *saddr, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; - if (oif) { - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + if (!oif && ipv6_addr_any(saddr)) + goto out; + + for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + struct net_device *dev = sprt->rt6i_dev; + + if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -261,14 +264,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } local = sprt; } + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return sprt; } + } + if (oif) { if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } +out: return rt; } @@ -541,7 +551,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); @@ -666,7 +676,7 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; + int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -968,13 +978,12 @@ out: return &rt->u.dst; } -int icmp6_dst_gc(int *more) +int icmp6_dst_gc(void) { struct dst_entry *dst, *next, **pprev; - int freed; + int more = 0; next = NULL; - freed = 0; spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; @@ -983,16 +992,34 @@ int icmp6_dst_gc(int *more) if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; dst_free(dst); - freed++; } else { pprev = &dst->next; - (*more)++; + ++more; } } spin_unlock_bh(&icmp6_dst_lock); - return freed; + return more; +} + +static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), + void *arg) +{ + struct dst_entry *dst, **pprev; + + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; + while ((dst = *pprev) != NULL) { + struct rt6_info *rt = (struct rt6_info *) dst; + if (func(rt, arg)) { + *pprev = dst->next; + dst_free(dst); + } else { + pprev = &dst->next; + } + } + spin_unlock_bh(&icmp6_dst_lock); } static int ip6_dst_gc(struct dst_ops *ops) @@ -1048,7 +1075,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) hoplimit = idev->cnf.hop_limit; in6_dev_put(idev); } else - hoplimit = ipv6_devconf.hop_limit; + hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; } return hoplimit; } @@ -1241,7 +1268,7 @@ install_route: if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!dst_metric(&rt->u.dst, RTAX_MTU)) + if (!dst_mtu(&rt->u.dst)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); @@ -1806,16 +1833,19 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; + struct dst_entry *dst = skb->dst; switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + ipstats_mib_noroutes); break; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); @@ -1922,6 +1952,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) }; fib6_clean_all(net, fib6_ifdown, 0, &adn); + icmp6_clean_all(fib6_ifdown, &adn); } struct rt6_mtu_change_arg @@ -2098,7 +2129,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(sizeof(struct rta_cacheinfo)); } -static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_fill_node(struct net *net, + struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) @@ -2179,8 +2211,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, #endif NLA_PUT_U32(skb, RTA_IIF, iif); } else if (dst) { + struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); struct in6_addr saddr_buf; - if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2225,7 +2258,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) } else prefix = 0; - return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, + return rt6_fill_node(arg->net, + arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2291,7 +2325,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); skb->dst = &rt->u.dst; - err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { @@ -2318,7 +2352,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, + err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ @@ -2406,26 +2440,7 @@ static int ipv6_route_show(struct seq_file *m, void *v) static int ipv6_route_open(struct inode *inode, struct file *file) { - int err; - struct net *net = get_proc_net(inode); - if (!net) - return -ENXIO; - - err = single_open(file, ipv6_route_show, net); - if (err < 0) { - put_net(net); - return err; - } - - return 0; -} - -static int ipv6_route_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, ipv6_route_show); } static const struct file_operations ipv6_route_proc_fops = { @@ -2433,7 +2448,7 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = ipv6_route_release, + .release = single_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) @@ -2453,26 +2468,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - int err; - struct net *net = get_proc_net(inode); - if (!net) - return -ENXIO; - - err = single_open(file, rt6_stats_seq_show, net); - if (err < 0) { - put_net(net); - return err; - } - - return 0; -} - -static int rt6_stats_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = (struct net *)seq->private; - put_net(net); - return single_release(inode, file); + return single_open_net(inode, file, rt6_stats_seq_show); } static const struct file_operations rt6_stats_seq_fops = { @@ -2480,7 +2476,7 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = rt6_stats_seq_release, + .release = single_release_net, }; #endif /* CONFIG_PROC_FS */ @@ -2638,10 +2634,8 @@ static int ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); - if (!net->ipv6.ip6_prohibit_entry) { - kfree(net->ipv6.ip6_null_entry); - goto out; - } + if (!net->ipv6.ip6_prohibit_entry) + goto out_ip6_null_entry; net->ipv6.ip6_prohibit_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; @@ -2649,16 +2643,22 @@ static int ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); - if (!net->ipv6.ip6_blk_hole_entry) { - kfree(net->ipv6.ip6_null_entry); - kfree(net->ipv6.ip6_prohibit_entry); - goto out; - } + if (!net->ipv6.ip6_blk_hole_entry) + goto out_ip6_prohibit_entry; net->ipv6.ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; #endif + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + #ifdef CONFIG_PROC_FS proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); @@ -2669,6 +2669,12 @@ static int ip6_route_net_init(struct net *net) out: return ret; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_ip6_prohibit_entry: + kfree(net->ipv6.ip6_prohibit_entry); +out_ip6_null_entry: + kfree(net->ipv6.ip6_null_entry); +#endif out_ip6_dst_ops: release_net(net->ipv6.ip6_dst_ops->dst_net); kfree(net->ipv6.ip6_dst_ops); @@ -2715,6 +2721,8 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 32e871a6c25a..b7a50e968506 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,8 +6,6 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -493,13 +491,13 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { - tunnel->stat.rx_errors++; + tunnel->dev->stats.rx_errors++; read_unlock(&ipip6_lock); kfree_skb(skb); return 0; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -539,7 +537,7 @@ static inline __be32 try_6to4(struct in6_addr *v6dst) static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; @@ -553,7 +551,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int addr_type; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -620,20 +618,20 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -643,7 +641,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -920,11 +918,6 @@ done: return err; } -static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -938,7 +931,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->uninit = ipip6_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->get_stats = ipip6_tunnel_get_stats; dev->do_ioctl = ipip6_tunnel_ioctl; dev->change_mtu = ipip6_tunnel_change_mtu; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 3ecc1157994e..ec394cf5a19b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) ; *mssp = msstab[mssind] + 1; - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, th->dest, ntohl(th->seq), @@ -177,11 +177,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || (mss = cookie_check(skb, cookie)) == 0) { - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } - NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -199,10 +199,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); treq = tcp_rsk(req); - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } + if (security_inet_conn_request(sk, skb, req)) + goto out_free; req->mss = mss; ireq->rmt_port = th->source; @@ -223,6 +221,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -254,14 +253,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; security_req_classify_flow(req, &fl); - if (ip6_dst_lookup(sk, &dst, &fl)) { - reqsk_free(req); - goto out; - } + if (ip6_dst_lookup(sk, &dst, &fl)) + goto out_free; + if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto out; + goto out_free; } req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); @@ -272,7 +270,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ret = get_cookie_sock(sk, skb, req, dst); - -out: return ret; +out: + return ret; +out_free: + reqsk_free(req); + return NULL; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3804dcbbfab0..587f8f60c489 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -37,6 +37,10 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { .ctl_name = 0 } +}; + +static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", @@ -80,12 +84,6 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; - /* We don't want this value to be per namespace, it should be global - to all namespaces, so make it read-only when we are not in the - init network namespace */ - if (net != &init_net) - ipv6_table[3].mode = 0444; - net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); if (!net->ipv6.sysctl.table) @@ -126,12 +124,45 @@ static struct pernet_operations ipv6_sysctl_net_ops = { .exit = ipv6_sysctl_net_exit, }; +static struct ctl_table_header *ip6_header; + int ipv6_sysctl_register(void) { - return register_pernet_subsys(&ipv6_sysctl_net_ops); + int err = -ENOMEM;; + + ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); + if (ip6_header == NULL) + goto out; + + err = register_pernet_subsys(&ipv6_sysctl_net_ops); + if (err) + goto err_pernet; +out: + return err; + +err_pernet: + unregister_net_sysctl_table(ip6_header); + goto out; } void ipv6_sysctl_unregister(void) { + unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } + +static struct ctl_table_header *ip6_base; + +int ipv6_static_sysctl_register(void) +{ + static struct ctl_table empty[1]; + ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); + if (ip6_base == NULL) + return -ENOMEM; + return 0; +} + +void ipv6_static_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_base); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb46749d4c32..e5310c9b84dc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,8 +5,6 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $ - * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c @@ -71,9 +69,8 @@ #include <linux/scatterlist.h> static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); -static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); -static void tcp_v6_send_check(struct sock *sk, int len, - struct sk_buff *skb); +static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); @@ -82,6 +79,12 @@ static struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_specific; static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; +#else +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, + struct in6_addr *addr) +{ + return NULL; +} #endif static void tcp_v6_hash(struct sock *sk) @@ -321,12 +324,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct tcp_sock *tp; __u32 seq; + struct net *net = dev_net(skb->dev); - sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr, + sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -337,7 +342,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bh_lock_sock(sk); if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; @@ -346,7 +351,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -418,10 +423,10 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - BUG_TRAP(req->sk == NULL); + WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -733,109 +738,105 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen); } -static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct in6_addr *saddr, - struct in6_addr *daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, + struct in6_addr *daddr, + struct in6_addr *saddr, int nbytes) { - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 cksum; - struct tcp_md5sig_pool *hp; struct tcp6_pseudohdr *bp; - struct hash_desc *desc; - int err; - unsigned int nbytes = 0; + struct scatterlist sg; - hp = tcp_get_md5sig_pool(); - if (!hp) { - printk(KERN_WARNING "%s(): hash pool not found...\n", __func__); - goto clear_hash_noput; - } bp = &hp->md5_blk.ip6; - desc = &hp->md5_desc; - /* 1. TCP pseudo-header (RFC2460) */ ipv6_addr_copy(&bp->saddr, saddr); ipv6_addr_copy(&bp->daddr, daddr); - bp->len = htonl(tcplen); - bp->protocol = htonl(protocol); - - sg_init_table(sg, 4); - - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + bp->len = cpu_to_be32(nbytes); - /* 2. TCP header, excluding options */ - cksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(*th)); - nbytes += sizeof(*th); - - /* 3. TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - u8 *data = (u8 *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } + sg_init_one(&sg, bp, sizeof(*bp)); + return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); +} - /* 4. shared key */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; +static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, + struct in6_addr *daddr, struct in6_addr *saddr, + struct tcphdr *th) +{ + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; - sg_mark_end(&sg[block - 1]); + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; - /* Now store the hash into the packet */ - err = crypto_hash_init(desc); - if (err) { - printk(KERN_WARNING "%s(): hash_init failed\n", __func__); + if (crypto_hash_init(desc)) goto clear_hash; - } - err = crypto_hash_update(desc, sg, nbytes); - if (err) { - printk(KERN_WARNING "%s(): hash_update failed\n", __func__); + if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; - } - err = crypto_hash_final(desc, md5_hash); - if (err) { - printk(KERN_WARNING "%s(): hash_final failed\n", __func__); + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) goto clear_hash; - } - /* Reset header, and free up the crypto */ tcp_put_md5sig_pool(); - th->check = cksum; -out: return 0; + clear_hash: tcp_put_md5sig_pool(); clear_hash_noput: memset(md5_hash, 0, 16); - goto out; + return 1; } -static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, - struct dst_entry *dst, - struct request_sock *req, - struct tcphdr *th, int protocol, - unsigned int tcplen) +static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, + struct sock *sk, struct request_sock *req, + struct sk_buff *skb) { struct in6_addr *saddr, *daddr; + struct tcp_md5sig_pool *hp; + struct hash_desc *desc; + struct tcphdr *th = tcp_hdr(skb); if (sk) { saddr = &inet6_sk(sk)->saddr; daddr = &inet6_sk(sk)->daddr; - } else { + } else if (req) { saddr = &inet6_rsk(req)->loc_addr; daddr = &inet6_rsk(req)->rmt_addr; + } else { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + saddr = &ip6h->saddr; + daddr = &ip6h->daddr; } - return tcp_v6_do_calc_md5_hash(md5_hash, key, - saddr, daddr, - th, protocol, tcplen); + + hp = tcp_get_md5sig_pool(); + if (!hp) + goto clear_hash_noput; + desc = &hp->md5_desc; + + if (crypto_hash_init(desc)) + goto clear_hash; + + if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) + goto clear_hash; + if (tcp_md5_hash_header(hp, th)) + goto clear_hash; + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + goto clear_hash; + if (tcp_md5_hash_key(hp, key)) + goto clear_hash; + if (crypto_hash_final(desc, md5_hash)) + goto clear_hash; + + tcp_put_md5sig_pool(); + return 0; + +clear_hash: + tcp_put_md5sig_pool(); +clear_hash_noput: + memset(md5_hash, 0, 16); + return 1; } static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) @@ -844,74 +845,31 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) struct tcp_md5sig_key *hash_expected; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof (*th); int genhash; - u8 *ptr; u8 newhash[16]; hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); + hash_location = tcp_parse_md5sig_option(th); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return hash_expected ? 1 : 0; - - /* parse options */ - ptr = (u8*)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch(opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2 || opsize > length) - goto done_opts; - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize - 2; - length -= opsize; - } + /* We've parsed the options - do we have a hash? */ + if (!hash_expected && !hash_location) + return 0; -done_opts: - /* do we have a hash as expected? */ - if (!hash_expected) { - if (!hash_location) - return 0; - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + if (hash_expected && !hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } - if (!hash_location) { - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + if (!hash_expected && hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } /* check the signature */ - genhash = tcp_v6_do_calc_md5_hash(newhash, - hash_expected, - &ip6h->saddr, &ip6h->daddr, - th, sk->sk_protocol, - skb->len); + genhash = tcp_v6_md5_hash_skb(newhash, + hash_expected, + NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { printk(KERN_INFO "MD5 Hash %s for " @@ -984,39 +942,24 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) return 0; } -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, + u32 ts, struct tcp_md5sig_key *key, int rst) { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; struct net *net = dev_net(skb->dst->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; - unsigned int tot_len = sizeof(*th); -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; -#endif - - if (th->rst) - return; - - if (!ipv6_unicast_destination(skb)) - return; + unsigned int tot_len = sizeof(struct tcphdr); + __be32 *topt; + if (ts) + tot_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG - if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); - else - key = NULL; - if (key) tot_len += TCPOLEN_MD5SIG_ALIGNED; #endif - /* - * We need to grab some memory, and put together an RST, - * and then put it into the queue to be sent. - */ - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); if (buff == NULL) @@ -1031,38 +974,39 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) t1->dest = th->source; t1->source = th->dest; t1->doff = tot_len / 4; - t1->rst = 1; + t1->seq = htonl(seq); + t1->ack_seq = htonl(ack); + t1->ack = !rst || !th->ack; + t1->rst = rst; + t1->window = htons(win); - if(th->ack) { - t1->seq = th->ack_seq; - } else { - t1->ack = 1; - t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin - + skb->len - (th->doff<<2)); + topt = (__be32 *)(t1 + 1); + + if (ts) { + *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *topt++ = htonl(tcp_time_stamp); + *topt++ = htonl(ts); } #ifdef CONFIG_TCP_MD5SIG if (key) { - __be32 *opt = (__be32*)(t1 + 1); - opt[0] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, - t1, IPPROTO_TCP, tot_len); + *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); + tcp_v6_md5_hash_hdr((__u8 *)topt, key, + &ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, t1); } #endif - buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); + buff->csum = csum_partial((char *)t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - sizeof(*t1), IPPROTO_TCP, + tot_len, IPPROTO_TCP, buff->csum); fl.proto = IPPROTO_TCP; @@ -1076,11 +1020,11 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + if (rst) + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); return; } } @@ -1088,104 +1032,36 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) kfree_skb(buff); } -static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, - struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) +static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb), *t1; - struct sk_buff *buff; - struct flowi fl; - struct net *net = dev_net(skb->dev); - struct sock *ctl_sk = net->ipv6.tcp_sk; - unsigned int tot_len = sizeof(struct tcphdr); - __be32 *topt; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif - -#ifdef CONFIG_TCP_MD5SIG - if (!tw && skb->sk) { - key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr); - } else if (tw && tw->tw_md5_keylen) { - tw_key.key = tw->tw_md5_key; - tw_key.keylen = tw->tw_md5_keylen; - key = &tw_key; - } else { - key = NULL; - } -#endif - - if (ts) - tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) - tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + struct tcphdr *th = tcp_hdr(skb); + u32 seq = 0, ack_seq = 0; + struct tcp_md5sig_key *key = NULL; - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, - GFP_ATOMIC); - if (buff == NULL) + if (th->rst) return; - skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); - - t1 = (struct tcphdr *) skb_push(buff,tot_len); - - /* Swap the send and the receive. */ - memset(t1, 0, sizeof(*t1)); - t1->dest = th->source; - t1->source = th->dest; - t1->doff = tot_len/4; - t1->seq = htonl(seq); - t1->ack_seq = htonl(ack); - t1->ack = 1; - t1->window = htons(win); - - topt = (__be32 *)(t1 + 1); - - if (ts) { - *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *topt++ = htonl(tcp_time_stamp); - *topt = htonl(ts); - } + if (!ipv6_unicast_destination(skb)) + return; #ifdef CONFIG_TCP_MD5SIG - if (key) { - *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_do_calc_md5_hash((__u8 *)topt, key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, - t1, IPPROTO_TCP, tot_len); - } + if (sk) + key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); - - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); - - t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - tot_len, IPPROTO_TCP, - buff->csum); - - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); - fl.fl_ip_dport = t1->dest; - fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); + if (th->ack) + seq = ntohl(th->ack_seq); + else + ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - + (th->doff << 2); - if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(ctl_sk, buff, &fl, NULL, 0); - TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); - return; - } - } + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1); +} - kfree_skb(buff); +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, + struct tcp_md5sig_key *key) +{ + tcp_v6_send_response(skb, seq, ack, win, ts, key, 0); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -1193,16 +1069,18 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw)); inet_twsk_put(tw); } -static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) { - tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent); + tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr)); } @@ -1345,7 +1223,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet6_request_sock *treq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1409,6 +1287,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } + treq = inet6_rsk(req); opt = np->opt; if (sk_acceptq_is_full(sk)) @@ -1538,9 +1417,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; out_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); @@ -1669,7 +1548,7 @@ discard: kfree_skb(skb); return 0; csum_err: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; @@ -1707,6 +1586,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) struct tcphdr *th; struct sock *sk; int ret; + struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1714,7 +1594,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) /* * Count it even if it's bad. */ - TCP_INC_STATS_BH(TCP_MIB_INSEGS); + TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; @@ -1738,11 +1618,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); - + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; @@ -1786,7 +1662,7 @@ no_tcp_socket: if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { bad_packet: - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb); } @@ -1811,7 +1687,7 @@ do_time_wait: } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { - TCP_INC_STATS_BH(TCP_MIB_INERRS); + TCP_INC_STATS_BH(net, TCP_MIB_INERRS); inet_twsk_put(inet_twsk(sk)); goto discard_it; } @@ -1871,7 +1747,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_lookup = tcp_v6_md5_lookup, - .calc_md5_hash = tcp_v6_calc_md5_hash, + .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; @@ -1903,7 +1779,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_lookup = tcp_v4_md5_lookup, - .calc_md5_hash = tcp_v4_calc_md5_hash, + .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; @@ -1960,7 +1836,7 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static int tcp_v6_destroy_sock(struct sock *sk) +static void tcp_v6_destroy_sock(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list */ @@ -1968,7 +1844,7 @@ static int tcp_v6_destroy_sock(struct sock *sk) tcp_v6_clear_md5_list(sk); #endif tcp_v4_destroy_sock(sk); - return inet6_destroy_sock(sk); + inet6_destroy_sock(sk); } #ifdef CONFIG_PROC_FS @@ -2036,7 +1912,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2052,8 +1928,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); @@ -2206,6 +2082,7 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dd309626ae9a..e51da8c092fa 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,8 +7,6 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $ - * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which @@ -67,7 +65,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && @@ -109,6 +107,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, return result; } +static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + struct sock *sk; + struct ipv6hdr *iph = ipv6_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + udptable); +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -168,7 +181,8 @@ try_again: goto out_free; if (!peeked) - UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -215,7 +229,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (flags & MSG_DONTWAIT) @@ -299,14 +313,17 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + if (rc == -ENOMEM) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } return 0; drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -355,15 +372,16 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, - struct in6_addr *daddr, struct hlist_head udptable[]) +static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr, + struct hlist_head udptable[]) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -376,7 +394,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { - bh_lock_sock_nested(sk2); + bh_lock_sock(sk2); if (!sock_owned_by_user(sk2)) udpv6_queue_rcv_skb(sk2, buff); else @@ -384,7 +402,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, bh_unlock_sock(sk2); } } - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); else @@ -437,6 +455,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct net_device *dev = skb->dev; struct in6_addr *saddr, *daddr; u32 ulen = 0; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto short_packet; @@ -475,7 +494,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); + return __udp6_lib_mcast_deliver(net, skb, + saddr, daddr, udptable); /* Unicast */ @@ -483,8 +503,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source, - daddr, uh->dest, inet6_iif(skb), udptable); + sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk == NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -492,7 +511,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, + proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -502,7 +522,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* deliver */ - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); else @@ -517,7 +537,7 @@ short_packet: ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -587,7 +607,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -869,7 +890,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -881,15 +903,13 @@ do_confirm: goto out; } -int udpv6_destroy_sock(struct sock *sk) +void udpv6_destroy_sock(struct sock *sk) { lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); inet6_destroy_sock(sk); - - return 0; } /* @@ -955,7 +975,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket srcp = ntohs(inet->sport); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", bucket, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -967,7 +987,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); } int udp6_seq_show(struct seq_file *seq, void *v) @@ -978,7 +999,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode\n"); + " uid timeout inode ref pointer drops\n"); else udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); return 0; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 321b81a4d418..92dd7da766d8 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -29,7 +29,7 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern int udpv6_destroy_sock(struct sock *sk); +extern void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS extern int udp6_seq_show(struct seq_file *seq, void *v); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 491efd00a866..3cd1a1ac3d6c 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -2,8 +2,6 @@ * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. * See also net/ipv4/udplite.c * - * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ - * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: @@ -15,8 +13,6 @@ */ #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; - static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index d6ce400f585f..bbd48b101bae 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -40,16 +40,39 @@ static void xfrm6_beet_make_header(struct sk_buff *skb) static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *top_iph; - - skb_set_network_header(skb, -x->props.header_len); + struct ip_beet_phdr *ph; + struct iphdr *iphv4; + int optlen, hdr_len; + + iphv4 = ip_hdr(skb); + hdr_len = 0; + optlen = XFRM_MODE_SKB_CB(skb)->optlen; + if (unlikely(optlen)) + hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb_set_network_header(skb, -x->props.header_len - hdr_len); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); - __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl); + ph = (struct ip_beet_phdr *)__skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl-hdr_len); xfrm6_beet_make_header(skb); top_iph = ipv6_hdr(skb); + if (unlikely(optlen)) { + + BUG_ON(optlen < 0); + + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = optlen / 8; + ph->nexthdr = top_iph->nexthdr; + if (ph->padlen) + memset(ph + 1, IPOPT_NOP, ph->padlen); + + top_iph->nexthdr = IPPROTO_BEETPH; + } ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8f1e0543b3c4..08e4cbbe3f04 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -52,12 +52,14 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; + struct net_device *dev; dst = xfrm6_dst_lookup(0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; - ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, + dev = ip6_dst_idev(dst)->dev; + ipv6_dev_get_saddr(dev_net(dev), dev, (struct in6_addr *)&daddr->a6, 0, (struct in6_addr *)&saddr->a6); dst_release(dst); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 81ae8735f5e3..b6e70f92e7fb 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -335,7 +335,7 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN && event != NETDEV_UP) @@ -1636,7 +1636,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty u16 ipx_pktsize; int rc = 0; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* Not ours */ diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 76c3057d0179..e4e2caeb9d82 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -650,12 +650,7 @@ static void ircomm_tty_do_softint(struct work_struct *work) } /* Check if user (still) wants to be waken up */ - if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - tty->ldisc.write_wakeup) - { - (tty->ldisc.write_wakeup)(tty); - } - wake_up_interruptible(&tty->write_wait); + tty_wakeup(tty); } /* @@ -1141,6 +1136,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; + struct tty_ldisc *ld; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -1173,7 +1169,11 @@ static int ircomm_tty_data_indication(void *instance, void *sap, * involve the flip buffers, since we are not running in an interrupt * handler */ - self->tty->ldisc.receive_buf(self->tty, skb->data, NULL, skb->len); + + ld = tty_ldisc_ref(self->tty); + if (ld) + ld->ops->receive_buf(self->tty, skb->data, NULL, skb->len); + tty_ldisc_deref(ld); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 90894534f3cc..f17b65af9c9b 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1326,7 +1326,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto out; /* FIXME: should we get our own field? */ diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad30..bccf4d0059f0 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,6 +241,7 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/smp_lock.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/proc_fs.h> diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index e0eab5927c4f..6d8ae03c14f5 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -479,6 +479,7 @@ dev_irnet_open(struct inode * inode, ap = kzalloc(sizeof(*ap), GFP_KERNEL); DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); + lock_kernel(); /* initialize the irnet structure */ ap->file = file; @@ -500,6 +501,7 @@ dev_irnet_open(struct inode * inode, { DERROR(FS_ERROR, "Can't setup IrDA link...\n"); kfree(ap); + unlock_kernel(); return err; } @@ -510,6 +512,7 @@ dev_irnet_open(struct inode * inode, file->private_data = ap; DEXIT(FS_TRACE, " - ap=0x%p\n", ap); + unlock_kernel(); return 0; } @@ -628,8 +631,8 @@ dev_irnet_poll(struct file * file, * This is the way pppd configure us and control us while the PPP * instance is active. */ -static int -dev_irnet_ioctl(struct inode * inode, +static long +dev_irnet_ioctl( struct file * file, unsigned int cmd, unsigned long arg) @@ -660,6 +663,7 @@ dev_irnet_ioctl(struct inode * inode, { DEBUG(FS_INFO, "Entering PPP discipline.\n"); /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/ + lock_kernel(); err = ppp_register_channel(&ap->chan); if(err == 0) { @@ -672,12 +676,14 @@ dev_irnet_ioctl(struct inode * inode, } else DERROR(FS_ERROR, "Can't setup PPP channel...\n"); + unlock_kernel(); } else { /* In theory, should be N_TTY */ DEBUG(FS_INFO, "Exiting PPP discipline.\n"); /* Disconnect from the generic PPP layer */ + lock_kernel(); if(ap->ppp_open) { ap->ppp_open = 0; @@ -686,24 +692,20 @@ dev_irnet_ioctl(struct inode * inode, else DERROR(FS_ERROR, "Channel not registered !\n"); err = 0; + unlock_kernel(); } break; /* Query PPP channel and unit number */ case PPPIOCGCHAN: - if(!ap->ppp_open) - break; - if(put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query channel.\n"); - err = 0; + if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), + (int __user *)argp)) + err = 0; break; case PPPIOCGUNIT: - if(!ap->ppp_open) - break; - if(put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query unit number.\n"); + lock_kernel(); + if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), + (int __user *)argp)) err = 0; break; @@ -723,34 +725,39 @@ dev_irnet_ioctl(struct inode * inode, DEBUG(FS_INFO, "Standard PPP ioctl.\n"); if(!capable(CAP_NET_ADMIN)) err = -EPERM; - else + else { + lock_kernel(); err = ppp_irnet_ioctl(&ap->chan, cmd, arg); + unlock_kernel(); + } break; /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */ /* Get termios */ case TCGETS: DEBUG(FS_INFO, "Get termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) - break; + if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) + err = 0; #else if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) - break; + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set termios */ case TCSETSF: DEBUG(FS_INFO, "Set termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) + err = 0; #else - if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set DTR/RTS */ @@ -773,7 +780,9 @@ dev_irnet_ioctl(struct inode * inode, * We should also worry that we don't accept junk here and that * we get rid of our own buffers */ #ifdef FLUSH_TO_PPP + lock_kernel(); ppp_output_wakeup(&ap->chan); + unlock_kernel(); #endif /* FLUSH_TO_PPP */ err = 0; break; @@ -788,7 +797,7 @@ dev_irnet_ioctl(struct inode * inode, default: DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd); - err = -ENOIOCTLCMD; + err = -ENOTTY; } DEXIT(FS_TRACE, " - err = 0x%X\n", err); diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h index d2beb7df8f7f..d9f8bd4ebd05 100644 --- a/net/irda/irnet/irnet_ppp.h +++ b/net/irda/irnet/irnet_ppp.h @@ -76,9 +76,8 @@ static ssize_t static unsigned int dev_irnet_poll(struct file *, poll_table *); -static int - dev_irnet_ioctl(struct inode *, - struct file *, +static long + dev_irnet_ioctl(struct file *, unsigned int, unsigned long); /* ------------------------ PPP INTERFACE ------------------------ */ @@ -102,7 +101,7 @@ static struct file_operations irnet_device_fops = .read = dev_irnet_read, .write = dev_irnet_write, .poll = dev_irnet_poll, - .ioctl = dev_irnet_ioctl, + .unlocked_ioctl = dev_irnet_ioctl, .open = dev_irnet_open, .release = dev_irnet_close /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 9e1fb82e3220..2f05ec1037ab 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (hdr == NULL) { + ret = -EMSGSIZE; goto err_out; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 7b0038f45b16..29f7baa25110 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -644,6 +644,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } txmsg.class = 0; + memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len); txmsg.tag = iucv->send_tag++; memcpy(skb->cb, &txmsg.tag, 4); skb_queue_tail(&iucv->send_skb_q, skb); @@ -1135,8 +1136,7 @@ static void iucv_callback_txdone(struct iucv_path *path, if (this) kfree_skb(this); } - if (!this) - printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag); + BUG_ON(!this); if (sk->sk_state == IUCV_CLOSING) { if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { @@ -1196,7 +1196,7 @@ static int __init afiucv_init(void) } cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { - printk(KERN_ERR "AF_IUCV needs the VM userid\n"); + WARN_ON(err); err = -EPROTONOSUPPORT; goto out; } @@ -1210,7 +1210,6 @@ static int __init afiucv_init(void) err = sock_register(&iucv_sock_family_ops); if (err) goto out_proto; - printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n"); return 0; out_proto: @@ -1226,8 +1225,6 @@ static void __exit afiucv_exit(void) sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); iucv_unregister(&af_iucv_handler, 0); - - printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n"); } module_init(afiucv_init); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 918970762131..d7b54b5bfa69 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -474,14 +474,14 @@ static void iucv_setmask_mp(void) { int cpu; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, - NULL, 0, 1); - preempt_enable(); + NULL, 1); + put_online_cpus(); } /** @@ -497,8 +497,8 @@ static void iucv_setmask_up(void) /* Disable all cpu but the first in cpu_irq_cpumask. */ cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); - for_each_cpu_mask(cpu, cpumask) - smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); + for_each_cpu_mask_nr(cpu, cpumask) + smp_call_function_single(cpu, iucv_block_cpu, NULL, 1); } /** @@ -521,16 +521,17 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); - preempt_enable(); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; + put_online_cpus(); return 0; out_path: + put_online_cpus(); kfree(iucv_path_table); out: return rc; @@ -545,7 +546,9 @@ out: */ static void iucv_disable(void) { - on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + get_online_cpus(); + on_each_cpu(iucv_retrieve_cpu, NULL, 1); + put_online_cpus(); kfree(iucv_path_table); } @@ -564,8 +567,11 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, return NOTIFY_BAD; iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); - if (!iucv_param[cpu]) + if (!iucv_param[cpu]) { + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; return NOTIFY_BAD; + } break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: @@ -580,7 +586,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -589,16 +595,16 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, if (cpus_empty(cpumask)) /* Can't offline last IUCV enabled cpu. */ return NOTIFY_BAD; - smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1); + smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); if (cpus_empty(iucv_irq_cpumask)) smp_call_function_single(first_cpu(iucv_buffer_cpumask), - iucv_allow_cpu, NULL, 0, 1); + iucv_allow_cpu, NULL, 1); break; } return NOTIFY_OK; } -static struct notifier_block __cpuinitdata iucv_cpu_notifier = { +static struct notifier_block __refdata iucv_cpu_notifier = { .notifier_call = iucv_cpu_notify, }; @@ -652,7 +658,7 @@ static void iucv_cleanup_queue(void) * pending interrupts force them to the work queue by calling * an empty function on all cpus. */ - smp_call_function(__iucv_cleanup_queue, NULL, 0, 1); + smp_call_function(__iucv_cleanup_queue, NULL, 1); spin_lock_irq(&iucv_queue_lock); list_for_each_entry_safe(p, n, &iucv_task_queue, list) { /* Remove stale work items from the task queue. */ @@ -1559,16 +1565,11 @@ static void iucv_external_interrupt(u16 code) p = iucv_irq_data[smp_processor_id()]; if (p->ippathid >= iucv_max_pathid) { - printk(KERN_WARNING "iucv_do_int: Got interrupt with " - "pathid %d > max_connections (%ld)\n", - p->ippathid, iucv_max_pathid - 1); + WARN_ON(p->ippathid >= iucv_max_pathid); iucv_sever_pathid(p->ippathid, iucv_error_no_listener); return; } - if (p->iptype < 0x01 || p->iptype > 0x09) { - printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); - return; - } + BUG_ON(p->iptype < 0x01 || p->iptype > 0x09); work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC); if (!work) { printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); diff --git a/net/key/af_key.c b/net/key/af_key.c index 7470e367272b..e55e0441e4d9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -58,6 +58,7 @@ struct pfkey_sock { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; + struct sk_buff *skb; } dump; }; @@ -73,22 +74,22 @@ static int pfkey_can_dump(struct sock *sk) return 0; } -static int pfkey_do_dump(struct pfkey_sock *pfk) +static void pfkey_terminate_dump(struct pfkey_sock *pfk) { - int rc; - - rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; - - pfk->dump.done(pfk); - pfk->dump.dump = NULL; - pfk->dump.done = NULL; - return rc; + if (pfk->dump.dump) { + if (pfk->dump.skb) { + kfree_skb(pfk->dump.skb); + pfk->dump.skb = NULL; + } + pfk->dump.done(pfk); + pfk->dump.dump = NULL; + pfk->dump.done = NULL; + } } static void pfkey_sock_destruct(struct sock *sk) { + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -96,8 +97,8 @@ static void pfkey_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); atomic_dec(&pfkey_socks_nr); } @@ -310,6 +311,31 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return err; } +static int pfkey_do_dump(struct pfkey_sock *pfk) +{ + struct sadb_msg *hdr; + int rc; + + rc = pfk->dump.dump(pfk); + if (rc == -ENOBUFS) + return 0; + + if (pfk->dump.skb) { + if (!pfkey_can_dump(&pfk->sk)) + return 0; + + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = NULL; + } + + pfkey_terminate_dump(pfk); + return rc; +} + static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) { *new = *orig; @@ -372,6 +398,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), + [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -579,25 +606,43 @@ static uint8_t pfkey_proto_from_xfrm(uint8_t proto) return (proto ? proto : IPSEC_PROTO_ANY); } -static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, - xfrm_address_t *xaddr) +static inline int pfkey_sockaddr_len(sa_family_t family) +{ + switch (family) { + case AF_INET: + return sizeof(struct sockaddr_in); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + return sizeof(struct sockaddr_in6); +#endif + } + return 0; +} + +static +int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) { - switch (((struct sockaddr*)(addr + 1))->sa_family) { + switch (sa->sa_family) { case AF_INET: xaddr->a4 = - ((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr; + ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: memcpy(xaddr->a6, - &((struct sockaddr_in6 *)(addr + 1))->sin6_addr, + &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif - default: - return 0; } - /* NOTREACHED */ + return 0; +} + +static +int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) +{ + return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), + xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs) @@ -642,20 +687,11 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) + static int pfkey_sockaddr_size(sa_family_t family) { - switch (family) { - case AF_INET: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in)); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in6)); -#endif - default: - return 0; - } - /* NOTREACHED */ + return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) @@ -687,6 +723,36 @@ static inline int pfkey_mode_to_xfrm(int mode) } } +static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, + struct sockaddr *sa, + unsigned short family) +{ + switch (family) { + case AF_INET: + { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + sin->sin_family = AF_INET; + sin->sin_port = port; + sin->sin_addr.s_addr = xaddr->a4; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + return 32; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = port; + sin6->sin6_flowinfo = 0; + ipv6_addr_copy(&sin6->sin6_addr, (struct in6_addr *)xaddr->a6); + sin6->sin6_scope_id = 0; + return 128; + } +#endif + } + return 0; +} + static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc) { @@ -697,13 +763,9 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; - struct sockaddr_in *sin; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int size; int auth_key_size = 0; int encrypt_key_size = 0; @@ -732,14 +794,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, } /* identity & sensitivity */ - - if ((x->props.family == AF_INET && - x->sel.saddr.a4 != x->props.saddr.a4) -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - || (x->props.family == AF_INET6 && - memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr))) -#endif - ) + if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { @@ -861,29 +916,12 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->props.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ @@ -894,70 +932,32 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; - addr->sadb_address_prefixlen = 32; /* XXX */ addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - if (x->sel.saddr.a4 != x->props.saddr.a4) { - addr = (struct sadb_address*) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); - addr->sadb_address_len = - (sizeof(struct sadb_address)+sockaddr_size)/ - sizeof(uint64_t); - addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; - addr->sadb_address_proto = - pfkey_proto_from_xfrm(x->sel.proto); - addr->sadb_address_prefixlen = x->sel.prefixlen_s; - addr->sadb_address_reserved = 0; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->sel.saddr.a4; - sin->sin_port = x->sel.sport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->id.daddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) + BUG(); - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; + if (xfrm_addr_cmp(&x->sel.saddr, &x->props.saddr, + x->props.family)) { + addr = (struct sadb_address*) skb_put(skb, + sizeof(struct sadb_address)+sockaddr_size); + addr->sadb_address_len = + (sizeof(struct sadb_address)+sockaddr_size)/ + sizeof(uint64_t); + addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; + addr->sadb_address_proto = + pfkey_proto_from_xfrm(x->sel.proto); + addr->sadb_address_prefixlen = x->sel.prefixlen_s; + addr->sadb_address_reserved = 0; - if (memcmp (x->sel.saddr.a6, x->props.saddr.a6, - sizeof(struct in6_addr))) { - addr = (struct sadb_address *) skb_put(skb, - sizeof(struct sadb_address)+sockaddr_size); - addr->sadb_address_len = - (sizeof(struct sadb_address)+sockaddr_size)/ - sizeof(uint64_t); - addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; - addr->sadb_address_proto = - pfkey_proto_from_xfrm(x->sel.proto); - addr->sadb_address_prefixlen = x->sel.prefixlen_s; - addr->sadb_address_reserved = 0; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = x->sel.sport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, x->sel.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } + pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, + (struct sockaddr *) (addr + 1), + x->props.family); } -#endif - else - BUG(); /* auth key */ if (add_keys && auth_key_size) { @@ -1763,9 +1763,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -1853,10 +1858,6 @@ static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) { struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) @@ -1881,31 +1882,19 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - struct sockaddr *sa; - sa = (struct sockaddr *)(rq+1); - switch(sa->sa_family) { - case AF_INET: - sin = (struct sockaddr_in*)sa; - t->saddr.a4 = sin->sin_addr.s_addr; - sin++; - if (sin->sin_family != AF_INET) - return -EINVAL; - t->id.daddr.a4 = sin->sin_addr.s_addr; - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (struct sockaddr_in6*)sa; - memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr)); - sin6++; - if (sin6->sin6_family != AF_INET6) - return -EINVAL; - memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr)); - break; -#endif - default: + u8 *sa = (u8 *) (rq + 1); + int family, socklen; + + family = pfkey_sockaddr_extract((struct sockaddr *)sa, + &t->saddr); + if (!family) return -EINVAL; - } - t->encap_family = sa->sa_family; + + socklen = pfkey_sockaddr_len(family); + if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen), + &t->id.daddr) != family) + return -EINVAL; + t->encap_family = family; } else t->encap_family = xp->family; @@ -1952,9 +1941,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; - socklen += (t->encap_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + @@ -1987,18 +1974,12 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; - struct sockaddr_in *sin; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); - int socklen = (xp->family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); @@ -2016,26 +1997,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; - /* src address */ - if (xp->family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = xp->selector.saddr.a4; - sin->sin_port = xp->selector.sport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (xp->family == AF_INET6) { - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = xp->selector.sport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, xp->selector.saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + if (!pfkey_sockaddr_fill(&xp->selector.saddr, + xp->selector.sport, + (struct sockaddr *) (addr + 1), + xp->family)) BUG(); /* dst address */ @@ -2048,26 +2013,10 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; - if (xp->family == AF_INET) { - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = xp->selector.daddr.a4; - sin->sin_port = xp->selector.dport; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (xp->family == AF_INET6) { - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = xp->selector.dport; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, xp->selector.daddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else - BUG(); + + pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, + (struct sockaddr *) (addr + 1), + xp->family); /* hard time */ lifetime = (struct sadb_lifetime *) skb_put(skb, @@ -2121,12 +2070,13 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in int mode; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode == XFRM_MODE_TUNNEL) - req_size += ((t->encap_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)) * 2); - else + if (t->mode == XFRM_MODE_TUNNEL) { + socklen = pfkey_sockaddr_len(t->encap_family); + req_size += socklen * 2; + } else { size -= 2*socklen; + socklen = 0; + } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); @@ -2141,42 +2091,15 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; + if (t->mode == XFRM_MODE_TUNNEL) { - switch (t->encap_family) { - case AF_INET: - sin = (void*)(rq+1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = t->saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - sin++; - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = t->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (void*)(rq+1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, t->saddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - - sin6++; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, t->id.daddr.a6, - sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - break; -#endif - default: - break; - } + u8 *sa = (void *)(rq + 1); + pfkey_sockaddr_fill(&t->saddr, 0, + (struct sockaddr *)sa, + t->encap_family); + pfkey_sockaddr_fill(&t->id.daddr, 0, + (struct sockaddr *) (sa + socklen), + t->encap_family); } } @@ -2346,7 +2269,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } @@ -2418,6 +2341,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; + c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2459,61 +2383,28 @@ out: #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_sockaddr_pair_size(sa_family_t family) { - switch (family) { - case AF_INET: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2); -#endif - default: - return 0; - } - /* NOTREACHED */ + return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } -static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - struct sockaddr *sa = (struct sockaddr *)(rq + 1); - if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(sa->sa_family)) + int af, socklen; + + if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; - switch (sa->sa_family) { - case AF_INET: - { - struct sockaddr_in *sin; - sin = (struct sockaddr_in *)sa; - if ((sin+1)->sin_family != AF_INET) - return -EINVAL; - memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4)); - sin++; - memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4)); - *family = AF_INET; - break; - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)sa; - if ((sin6+1)->sin6_family != AF_INET6) - return -EINVAL; - memcpy(&saddr->a6, &sin6->sin6_addr, - sizeof(saddr->a6)); - sin6++; - memcpy(&daddr->a6, &sin6->sin6_addr, - sizeof(daddr->a6)); - *family = AF_INET6; - break; - } -#endif - default: + af = pfkey_sockaddr_extract(sa, saddr); + if (!af) + return -EINVAL; + + socklen = pfkey_sockaddr_len(af); + if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), + daddr) != af) return -EINVAL; - } + *family = af; return 0; } @@ -2529,7 +2420,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* old endoints */ - err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), + rq1->sadb_x_ipsecrequest_len, + &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; @@ -2542,7 +2435,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* new endpoints */ - err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), + rq2->sadb_x_ipsecrequest_len, + &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; @@ -2568,29 +2463,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; + struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress k; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], - ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } + kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; - if (!pol) { - err = -EINVAL; - goto out; - } if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } + if (kma) { + /* convert sadb_x_kmaddress to xfrm_kmaddress */ + k.reserved = kma->sadb_x_kmaddress_reserved; + ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), + 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), + &k.local, &k.remote, &k.family); + if (ret < 0) { + err = ret; + goto out; + } + } + dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); @@ -2635,7 +2541,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, goto out; } - return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, + kma ? &k : NULL); out: return err; @@ -2714,9 +2621,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -3094,10 +3006,6 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; @@ -3146,29 +3054,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->props.saddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ @@ -3180,29 +3070,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->id.daddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->id.daddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->id.daddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); pol = (struct sadb_x_policy *) skb_put(skb, sizeof(struct sadb_x_policy)); @@ -3328,10 +3200,6 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); @@ -3395,29 +3263,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = x->props.saddr.a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, - x->props.saddr.a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(&x->props.saddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ @@ -3436,28 +3286,11 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; - if (x->props.family == AF_INET) { - addr->sadb_address_prefixlen = 32; - - sin = (struct sockaddr_in *) (addr + 1); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = ipaddr->a4; - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (x->props.family == AF_INET6) { - addr->sadb_address_prefixlen = 128; - - sin6 = (struct sockaddr_in6 *) (addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr)); - sin6->sin6_scope_id = 0; - } -#endif - else + addr->sadb_address_prefixlen = + pfkey_sockaddr_fill(ipaddr, 0, + (struct sockaddr *) (addr + 1), + x->props.family); + if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ @@ -3475,10 +3308,6 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, struct xfrm_selector *sel) { struct sadb_address *addr; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; @@ -3487,50 +3316,16 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, switch (type) { case SADB_EXT_ADDRESS_SRC: - if (sel->family == AF_INET) { - addr->sadb_address_prefixlen = sel->prefixlen_s; - sin = (struct sockaddr_in *)(addr + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, &sel->saddr, - sizeof(sin->sin_addr.s_addr)); - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (sel->family == AF_INET6) { - addr->sadb_address_prefixlen = sel->prefixlen_s; - sin6 = (struct sockaddr_in6 *)(addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr, - sizeof(sin6->sin6_addr.s6_addr)); - } -#endif + addr->sadb_address_prefixlen = sel->prefixlen_s; + pfkey_sockaddr_fill(&sel->saddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; case SADB_EXT_ADDRESS_DST: - if (sel->family == AF_INET) { - addr->sadb_address_prefixlen = sel->prefixlen_d; - sin = (struct sockaddr_in *)(addr + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, &sel->daddr, - sizeof(sin->sin_addr.s_addr)); - sin->sin_port = 0; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (sel->family == AF_INET6) { - addr->sadb_address_prefixlen = sel->prefixlen_d; - sin6 = (struct sockaddr_in6 *)(addr + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr, - sizeof(sin6->sin6_addr.s6_addr)); - } -#endif + addr->sadb_address_prefixlen = sel->prefixlen_d; + pfkey_sockaddr_fill(&sel->daddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; default: return -EINVAL; @@ -3539,16 +3334,40 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, return 0; } + +static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +{ + struct sadb_x_kmaddress *kma; + u8 *sa; + int family = k->family; + int socklen = pfkey_sockaddr_len(family); + int size_req; + + size_req = (sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(family)); + + kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); + memset(kma, 0, size_req); + kma->sadb_x_kmaddress_len = size_req / 8; + kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; + kma->sadb_x_kmaddress_reserved = k->reserved; + + sa = (u8 *)(kma + 1); + if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) + return -EINVAL; + + return 0; +} + static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, xfrm_address_t *src, xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; - struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; -#endif + u8 *sa; + int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + @@ -3562,38 +3381,10 @@ static int set_ipsecrequest(struct sk_buff *skb, rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; - switch (family) { - case AF_INET: - sin = (struct sockaddr_in *)(rq + 1); - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, src, - sizeof(sin->sin_addr.s_addr)); - sin++; - sin->sin_family = AF_INET; - memcpy(&sin->sin_addr.s_addr, dst, - sizeof(sin->sin_addr.s_addr)); - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - sin6 = (struct sockaddr_in6 *)(rq + 1); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, src, - sizeof(sin6->sin6_addr.s6_addr)); - sin6++; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - memcpy(&sin6->sin6_addr.s6_addr, dst, - sizeof(sin6->sin6_addr.s6_addr)); - break; -#endif - default: + sa = (u8 *) (rq + 1); + if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; - } return 0; } @@ -3601,7 +3392,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3618,6 +3410,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; + if (k != NULL) { + /* addresses for KM */ + size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(k->family)); + } + /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) @@ -3654,6 +3452,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; + /* Addresses to be used by KM for negotiation, if ext is available */ + if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) + return -EINVAL; + /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); @@ -3699,7 +3501,8 @@ err: } #else static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 97101dcde4c0..5bcc452a247f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -128,10 +128,8 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) static void llc_ui_sk_init(struct socket *sock, struct sock *sk) { + sock_graft(sk, sock); sk->sk_type = sock->type; - sk->sk_sleep = &sock->wait; - sk->sk_socket = sock; - sock->sk = sk; sock->ops = &llc_ui_ops; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 1c45f172991e..57ad974e4d94 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -150,7 +150,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; /* diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index a24b459dd45a..7f710a27e91c 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -7,7 +7,6 @@ config MAC80211 select CRC32 select WIRELESS_EXT select CFG80211 - select NET_SCH_FIFO ---help--- This option enables the hardware independent IEEE 802.11 networking stack. @@ -15,6 +14,19 @@ config MAC80211 menu "Rate control algorithm selection" depends on MAC80211 != n +config MAC80211_RC_PID + bool "PID controller based rate control algorithm" if EMBEDDED + default y + ---help--- + This option enables a TX rate control algorithm for + mac80211 that uses a PID controller to select the TX + rate. + +config MAC80211_RC_MINSTREL + bool "Minstrel" + ---help--- + This option enables the 'minstrel' TX rate control algorithm + choice prompt "Default rate control algorithm" default MAC80211_RC_DEFAULT_PID @@ -26,40 +38,27 @@ choice config MAC80211_RC_DEFAULT_PID bool "PID controller based rate control algorithm" - select MAC80211_RC_PID + depends on MAC80211_RC_PID ---help--- Select the PID controller based rate control as the default rate control algorithm. You should choose this unless you know what you are doing. -config MAC80211_RC_DEFAULT_NONE - bool "No default algorithm" - depends on EMBEDDED - help - Selecting this option will select no default algorithm - and allow you to not build any. Do not choose this - option unless you know your driver comes with another - suitable algorithm. -endchoice +config MAC80211_RC_DEFAULT_MINSTREL + bool "Minstrel" + depends on MAC80211_RC_MINSTREL + ---help--- + Select Minstrel as the default rate control algorithm. + -comment "Selecting 'y' for an algorithm will" -comment "build the algorithm into mac80211." +endchoice config MAC80211_RC_DEFAULT string default "pid" if MAC80211_RC_DEFAULT_PID + default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL default "" -config MAC80211_RC_PID - tristate "PID controller based rate control algorithm" - ---help--- - This option enables a TX rate control algorithm for - mac80211 that uses a PID controller to select the TX - rate. - - Say Y or M unless you're sure you want to use a - different rate control algorithm. - endmenu config MAC80211_MESH @@ -89,10 +88,16 @@ config MAC80211_DEBUGFS Say N unless you know you need this. +menuconfig MAC80211_DEBUG_MENU + bool "Select mac80211 debugging features" + depends on MAC80211 + ---help--- + This option collects various mac80211 debug settings. + config MAC80211_DEBUG_PACKET_ALIGNMENT bool "Enable packet alignment debugging" - depends on MAC80211 - help + depends on MAC80211_DEBUG_MENU + ---help--- This option is recommended for driver authors and strongly discouraged for everybody else, it will trigger a warning when a driver hands mac80211 a buffer that is aligned in @@ -101,67 +106,106 @@ config MAC80211_DEBUG_PACKET_ALIGNMENT Say N unless you're writing a mac80211 based driver. -config MAC80211_DEBUG - bool "Enable debugging output" - depends on MAC80211 +config MAC80211_NOINLINE + bool "Do not inline TX/RX handlers" + depends on MAC80211_DEBUG_MENU + ---help--- + This option affects code generation in mac80211, when + selected some functions are marked "noinline" to allow + easier debugging of problems in the transmit and receive + paths. + + This option increases code size a bit and inserts a lot + of function calls in the code, but is otherwise safe to + enable. + + If unsure, say N unless you expect to be finding problems + in mac80211. + +config MAC80211_VERBOSE_DEBUG + bool "Verbose debugging output" + depends on MAC80211_DEBUG_MENU ---help--- - This option will enable debug tracing output for the - ieee80211 network stack. + Selecting this option causes mac80211 to print out + many debugging messages. It should not be selected + on production systems as some of the messages are + remotely triggerable. - If you are not trying to debug or develop the ieee80211 - subsystem, you most likely want to say N here. + Do not select this option. config MAC80211_HT_DEBUG - bool "Enable HT debugging output" - depends on MAC80211_DEBUG + bool "Verbose HT debugging" + depends on MAC80211_DEBUG_MENU ---help--- This option enables 802.11n High Throughput features debug tracing output. - If you are not trying to debug of develop the ieee80211 - subsystem, you most likely want to say N here. + It should not be selected on production systems as some + of the messages are remotely triggerable. -config MAC80211_VERBOSE_DEBUG - bool "Verbose debugging output" - depends on MAC80211_DEBUG + Do not select this option. -config MAC80211_LOWTX_FRAME_DUMP - bool "Debug frame dumping" - depends on MAC80211_DEBUG +config MAC80211_TKIP_DEBUG + bool "Verbose TKIP debugging" + depends on MAC80211_DEBUG_MENU ---help--- - Selecting this option will cause the stack to - print a message for each frame that is handed - to the lowlevel driver for transmission. This - message includes all MAC addresses and the - frame control field. + Selecting this option causes mac80211 to print out + very verbose TKIP debugging messages. It should not + be selected on production systems as those messages + are remotely triggerable. - If unsure, say N and insert the debugging code - you require into the driver you are debugging. - -config TKIP_DEBUG - bool "TKIP debugging" - depends on MAC80211_DEBUG - -config MAC80211_DEBUG_COUNTERS - bool "Extra statistics for TX/RX debugging" - depends on MAC80211_DEBUG + Do not select this option. config MAC80211_IBSS_DEBUG - bool "Support for IBSS testing" - depends on MAC80211_DEBUG + bool "Verbose IBSS debugging" + depends on MAC80211_DEBUG_MENU ---help--- - Say Y here if you intend to debug the IBSS code. + Selecting this option causes mac80211 to print out + very verbose IBSS debugging messages. It should not + be selected on production systems as those messages + are remotely triggerable. + + Do not select this option. config MAC80211_VERBOSE_PS_DEBUG bool "Verbose powersave mode debugging" - depends on MAC80211_DEBUG + depends on MAC80211_DEBUG_MENU ---help--- - Say Y here to print out verbose powersave - mode debug messages. + Selecting this option causes mac80211 to print out very + verbose power save mode debugging messages (when mac80211 + is an AP and has power saving stations.) + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. config MAC80211_VERBOSE_MPL_DEBUG bool "Verbose mesh peer link debugging" - depends on MAC80211_DEBUG && MAC80211_MESH + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very + verbose mesh peer link debugging messages (when mac80211 + is taking part in a mesh network). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + +config MAC80211_DEBUG_COUNTERS + bool "Extra statistics for TX/RX debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_DEBUGFS + ---help--- + Selecting this option causes mac80211 to keep additional + and very verbose statistics about TX and RX handler use + and show them in debugfs. + + If unsure, say N. + +config MAC80211_VERBOSE_SPECT_MGMT_DEBUG + bool "Verbose Spectrum Management (IEEE 802.11h)debugging" + depends on MAC80211_DEBUG_MENU ---help--- - Say Y here to print out verbose mesh peer link + Say Y here to print out verbose Spectrum Management (IEEE 802.11h) debug messages. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4e5847fd316c..31cfd1f89a72 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -1,13 +1,5 @@ obj-$(CONFIG_MAC80211) += mac80211.o -# objects for PID algorithm -rc80211_pid-y := rc80211_pid_algo.o -rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o - -# build helper for PID algorithm -rc-pid-y := $(rc80211_pid-y) -rc-pid-m := rc80211_pid.o - # mac80211 objects mac80211-y := \ main.o \ @@ -15,6 +7,8 @@ mac80211-y := \ sta_info.o \ wep.o \ wpa.o \ + scan.o \ + ht.o \ mlme.o \ iface.o \ rate.o \ @@ -23,13 +17,14 @@ mac80211-y := \ aes_ccm.o \ cfg.o \ rx.o \ + spectmgmt.o \ tx.o \ key.o \ util.o \ + wme.o \ event.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o -mac80211-$(CONFIG_NET_SCHED) += wme.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ debugfs.o \ debugfs_sta.o \ @@ -42,10 +37,12 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_plink.o \ mesh_hwmp.o +# objects for PID algorithm +rc80211_pid-y := rc80211_pid_algo.o +rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o -# Build rate control algorithm(s) -CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE -mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID)) +rc80211_minstrel-y := rc80211_minstrel.o +rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o -# Modular rate algorithms are assigned to mac80211-m - make separate modules -obj-m += $(mac80211-m) +mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) +mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 59f1691f62c8..a87cb3ba2df6 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -16,31 +16,28 @@ #include "key.h" #include "aes_ccm.h" - -static void ieee80211_aes_encrypt(struct crypto_cipher *tfm, - const u8 pt[16], u8 ct[16]) -{ - crypto_cipher_encrypt_one(tfm, ct, pt); -} - - -static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad, - u8 *b, u8 *s_0, u8 *a) +static void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *scratch, u8 *a) { int i; + u8 *b_0, *aad, *b, *s_0; - ieee80211_aes_encrypt(tfm, b_0, b); + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; + b = scratch; + s_0 = scratch + AES_BLOCK_LEN; + + crypto_cipher_encrypt_one(tfm, b, b_0); /* Extra Authenticate-only data (always two AES blocks) */ for (i = 0; i < AES_BLOCK_LEN; i++) aad[i] ^= b[i]; - ieee80211_aes_encrypt(tfm, aad, b); + crypto_cipher_encrypt_one(tfm, b, aad); aad += AES_BLOCK_LEN; for (i = 0; i < AES_BLOCK_LEN; i++) aad[i] ^= b[i]; - ieee80211_aes_encrypt(tfm, aad, a); + crypto_cipher_encrypt_one(tfm, a, aad); /* Mask out bits from auth-only-b_0 */ b_0[0] &= 0x07; @@ -48,24 +45,26 @@ static inline void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *b_0, u8 *aad, /* S_0 is used to encrypt T (= MIC) */ b_0[14] = 0; b_0[15] = 0; - ieee80211_aes_encrypt(tfm, b_0, s_0); + crypto_cipher_encrypt_one(tfm, s_0, b_0); } void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *data, size_t data_len, + u8 *data, size_t data_len, u8 *cdata, u8 *mic) { int i, j, last_len, num_blocks; - u8 *pos, *cpos, *b, *s_0, *e; + u8 *pos, *cpos, *b, *s_0, *e, *b_0, *aad; b = scratch; s_0 = scratch + AES_BLOCK_LEN; e = scratch + 2 * AES_BLOCK_LEN; + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; - aes_ccm_prepare(tfm, b_0, aad, b, s_0, b); + aes_ccm_prepare(tfm, scratch, b); /* Process payload blocks */ pos = data; @@ -77,11 +76,11 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, /* Authentication followed by encryption */ for (i = 0; i < blen; i++) b[i] ^= pos[i]; - ieee80211_aes_encrypt(tfm, b, b); + crypto_cipher_encrypt_one(tfm, b, b); b_0[14] = (j >> 8) & 0xff; b_0[15] = j & 0xff; - ieee80211_aes_encrypt(tfm, b_0, e); + crypto_cipher_encrypt_one(tfm, e, b_0); for (i = 0; i < blen; i++) *cpos++ = *pos++ ^ e[i]; } @@ -92,19 +91,20 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *cdata, size_t data_len, - u8 *mic, u8 *data) + u8 *cdata, size_t data_len, u8 *mic, u8 *data) { int i, j, last_len, num_blocks; - u8 *pos, *cpos, *b, *s_0, *a; + u8 *pos, *cpos, *b, *s_0, *a, *b_0, *aad; b = scratch; s_0 = scratch + AES_BLOCK_LEN; a = scratch + 2 * AES_BLOCK_LEN; + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; - aes_ccm_prepare(tfm, b_0, aad, b, s_0, a); + aes_ccm_prepare(tfm, scratch, a); /* Process payload blocks */ cpos = cdata; @@ -116,13 +116,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, /* Decryption followed by authentication */ b_0[14] = (j >> 8) & 0xff; b_0[15] = j & 0xff; - ieee80211_aes_encrypt(tfm, b_0, b); + crypto_cipher_encrypt_one(tfm, b, b_0); for (i = 0; i < blen; i++) { *pos = *cpos++ ^ b[i]; a[i] ^= *pos++; } - - ieee80211_aes_encrypt(tfm, a, a); + crypto_cipher_encrypt_one(tfm, a, a); } for (i = 0; i < CCMP_MIC_LEN; i++) { @@ -134,7 +133,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, } -struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]) +struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) { struct crypto_cipher *tfm; diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 885f19030b29..6e7820ef3448 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -14,12 +14,12 @@ #define AES_BLOCK_LEN 16 -struct crypto_cipher * ieee80211_aes_key_setup_encrypt(const u8 key[]); +struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]); void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *data, size_t data_len, + u8 *data, size_t data_len, u8 *cdata, u8 *mic); int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, - u8 *b_0, u8 *aad, u8 *cdata, size_t data_len, + u8 *cdata, size_t data_len, u8 *mic, u8 *data); void ieee80211_aes_key_free(struct crypto_cipher *tfm); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a9fce4afdf21..855126a3039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,26 +17,26 @@ #include "rate.h" #include "mesh.h" -static enum ieee80211_if_types -nl80211_type_to_mac80211_type(enum nl80211_iftype type) +struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + return &local->hw; +} +EXPORT_SYMBOL(wiphy_to_hw); + +static bool nl80211_type_check(enum nl80211_iftype type) { switch (type) { - case NL80211_IFTYPE_UNSPECIFIED: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_ADHOC: - return IEEE80211_IF_TYPE_IBSS; case NL80211_IFTYPE_STATION: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_MONITOR: - return IEEE80211_IF_TYPE_MNTR; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - return IEEE80211_IF_TYPE_MESH_POINT; #endif case NL80211_IFTYPE_WDS: - return IEEE80211_IF_TYPE_WDS; + return true; default: - return IEEE80211_IF_TYPE_INVALID; + return false; } } @@ -45,20 +45,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); - enum ieee80211_if_types itype; struct net_device *dev; struct ieee80211_sub_if_data *sdata; int err; - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; - - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; - err = ieee80211_if_add(local->mdev, name, &dev, itype, params); - if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags) + err = ieee80211_if_add(local, name, &dev, type, params); + if (err || type != NL80211_IFTYPE_MONITOR || !flags) return err; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -68,61 +63,52 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; - char *name; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; + struct ieee80211_sub_if_data *sdata; /* we're under RTNL */ dev = __dev_get_by_index(&init_net, ifindex); if (!dev) - return 0; + return -ENODEV; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); - name = dev->name; + ieee80211_if_remove(sdata); - return ieee80211_if_remove(local->mdev, name, -1); + return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; - enum ieee80211_if_types itype; struct ieee80211_sub_if_data *sdata; - - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) - return -ENODEV; + int ret; /* we're under RTNL */ dev = __dev_get_by_index(&init_net, ifindex); if (!dev) return -ENODEV; - if (netif_running(dev)) - return -EBUSY; - - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - return -EOPNOTSUPP; + ret = ieee80211_if_change_type(sdata, type); + if (ret) + return ret; - ieee80211_if_reinit(dev); - ieee80211_if_set_type(dev, itype); + if (netif_running(sdata->dev)) + return -EBUSY; if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) - ieee80211_if_sta_set_mesh_id(&sdata->u.sta, - params->mesh_id_len, - params->mesh_id); + ieee80211_sdata_set_mesh_id(sdata, + params->mesh_id_len, + params->mesh_id); - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || !flags) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) return 0; sdata->u.mntr_flags = *flags; @@ -227,7 +213,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, void (*callback)(void *cookie, struct key_params *params)) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; u8 seq[6] = {0}; struct key_params params; @@ -236,6 +222,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u16 iv16; int err = -ENOENT; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + rcu_read_lock(); if (mac_addr) { @@ -256,8 +244,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, case ALG_TKIP: params.cipher = WLAN_CIPHER_SUITE_TKIP; - iv32 = key->u.tkip.iv32; - iv16 = key->u.tkip.iv16; + iv32 = key->u.tkip.tx.iv32; + iv16 = key->u.tkip.tx.iv16; if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && sdata->local->ops->get_tkip_seq) @@ -359,7 +347,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta = sta_info_get_by_idx(local, idx, dev); if (sta) { ret = 0; - memcpy(mac, sta->addr, ETH_ALEN); + memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo); } @@ -485,16 +473,18 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); - return ieee80211_if_config_beacon(sdata->dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); } static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -508,10 +498,12 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -524,10 +516,12 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -539,7 +533,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) synchronize_rcu(); kfree(old); - return ieee80211_if_config_beacon(dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); } /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ @@ -570,7 +564,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ memset(msg->da, 0xff, ETH_ALEN); - memcpy(msg->sa, sta->addr, ETH_ALEN); + memcpy(msg->sa, sta->sta.addr, ETH_ALEN); msg->len = htons(6); msg->dsap = 0; msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ @@ -602,6 +596,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, */ if (params->station_flags & STATION_FLAG_CHANGED) { + spin_lock_bh(&sta->lock); sta->flags &= ~WLAN_STA_AUTHORIZED; if (params->station_flags & STATION_FLAG_AUTHORIZED) sta->flags |= WLAN_STA_AUTHORIZED; @@ -613,6 +608,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->flags &= ~WLAN_STA_WME; if (params->station_flags & STATION_FLAG_WME) sta->flags |= WLAN_STA_WME; + spin_unlock_bh(&sta->lock); } /* @@ -623,9 +619,9 @@ static void sta_apply_parameters(struct ieee80211_local *local, */ if (params->aid) { - sta->aid = params->aid; - if (sta->aid > IEEE80211_MAX_AID) - sta->aid = 0; /* XXX: should this be an error? */ + sta->sta.aid = params->aid; + if (sta->sta.aid > IEEE80211_MAX_AID) + sta->sta.aid = 0; /* XXX: should this be an error? */ } if (params->listen_interval >= 0) @@ -642,7 +638,12 @@ static void sta_apply_parameters(struct ieee80211_local *local, rates |= BIT(j); } } - sta->supp_rates[local->oper_channel->band] = rates; + sta->sta.supp_rates[local->oper_channel->band] = rates; + } + + if (params->ht_capa) { + ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, + &sta->sta.ht_info); } if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { @@ -660,7 +661,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_parameters *params) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; @@ -672,8 +673,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -692,7 +693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sta_apply_parameters(local, sta, params); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); rcu_read_lock(); @@ -703,8 +704,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, return err; } - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN || - sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_AP) ieee80211_send_layer2_update(sta); rcu_read_unlock(); @@ -715,10 +716,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct sta_info *sta; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (mac) { rcu_read_lock(); @@ -744,7 +747,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, u8 *mac, struct station_parameters *params) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; @@ -760,13 +763,13 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && - vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { + if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && + vlansdata->vif.type != NL80211_IFTYPE_AP) { rcu_read_unlock(); return -EINVAL; } - sta->sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); + sta->sdata = vlansdata; ieee80211_send_layer2_update(sta); } @@ -781,8 +784,8 @@ static int ieee80211_change_station(struct wiphy *wiphy, static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; int err; @@ -790,7 +793,9 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -800,13 +805,13 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } - err = mesh_path_add(dst, dev); + err = mesh_path_add(dst, sdata); if (err) { rcu_read_unlock(); return err; } - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENXIO; @@ -820,10 +825,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst) { + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (dst) - return mesh_path_del(dst, dev); + return mesh_path_del(dst, sdata); - mesh_path_flush(dev); + mesh_path_flush(sdata); return 0; } @@ -831,15 +838,17 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; if (!netif_running(dev)) return -ENETDOWN; - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -850,7 +859,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, return -ENOENT; } - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -866,7 +875,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { if (mpath->next_hop) - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); else memset(next_hop, 0, ETH_ALEN); @@ -905,14 +914,16 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -927,14 +938,16 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); - mpath = mesh_path_lookup_by_idx(idx, dev); + mpath = mesh_path_lookup_by_idx(idx, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -946,6 +959,38 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, } #endif +static int ieee80211_change_bss(struct wiphy *wiphy, + struct net_device *dev, + struct bss_parameters *params) +{ + struct ieee80211_sub_if_data *sdata; + u32 changed = 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return -EINVAL; + + if (params->use_cts_prot >= 0) { + sdata->bss_conf.use_cts_prot = params->use_cts_prot; + changed |= BSS_CHANGED_ERP_CTS_PROT; + } + if (params->use_short_preamble >= 0) { + sdata->bss_conf.use_short_preamble = + params->use_short_preamble; + changed |= BSS_CHANGED_ERP_PREAMBLE; + } + if (params->use_short_slot_time >= 0) { + sdata->bss_conf.use_short_slot = + params->use_short_slot_time; + changed |= BSS_CHANGED_ERP_SLOT; + } + + ieee80211_bss_info_change_notify(sdata, changed); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -969,4 +1014,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, #endif + .change_bss = ieee80211_change_bss, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1cccbfd781f6..24ce54463310 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -51,8 +51,6 @@ DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", local->hw.conf.antenna_sel_tx); DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", local->hw.conf.antenna_sel_rx); -DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d", - local->bridge_packets); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -70,16 +68,6 @@ DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", /* statistics stuff */ -static inline int rtnl_lock_local(struct ieee80211_local *local) -{ - rtnl_lock(); - if (unlikely(local->reg_state != IEEE80211_DEV_REGISTERED)) { - rtnl_unlock(); - return -ENODEV; - } - return 0; -} - #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value) @@ -96,10 +84,7 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local, if (!local->ops->get_stats) return -EOPNOTSUPP; - res = rtnl_lock_local(local); - if (res) - return res; - + rtnl_lock(); res = local->ops->get_stats(local_to_hw(local), &stats); rtnl_unlock(); if (!res) @@ -197,45 +182,6 @@ DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u", DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u", local->tx_status_drop); -static ssize_t stats_wme_rx_queue_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[NUM_RX_DATA_QUEUES*15], *p = buf; - int i; - - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, - "%u\n", local->wme_rx_queue[i]); - - return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf); -} - -static const struct file_operations stats_wme_rx_queue_ops = { - .read = stats_wme_rx_queue_read, - .open = mac80211_open_file_generic, -}; - -static ssize_t stats_wme_tx_queue_read(struct file *file, - char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[NUM_TX_DATA_QUEUES*15], *p = buf; - int i; - - for (i = 0; i < NUM_TX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, - "%u\n", local->wme_tx_queue[i]); - - return simple_read_from_buffer(userbuf, count, ppos, buf, p-buf); -} - -static const struct file_operations stats_wme_tx_queue_ops = { - .read = stats_wme_tx_queue_read, - .open = mac80211_open_file_generic, -}; #endif DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); @@ -258,7 +204,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(frequency); DEBUGFS_ADD(antenna_sel_tx); DEBUGFS_ADD(antenna_sel_rx); - DEBUGFS_ADD(bridge_packets); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -303,8 +248,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_STATS_ADD(rx_expand_skb_head2); DEBUGFS_STATS_ADD(rx_handlers_fragments); DEBUGFS_STATS_ADD(tx_status_drop); - DEBUGFS_STATS_ADD(wme_tx_queue); - DEBUGFS_STATS_ADD(wme_rx_queue); #endif DEBUGFS_STATS_ADD(dot11ACKFailureCount); DEBUGFS_STATS_ADD(dot11RTSFailureCount); @@ -317,7 +260,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(frequency); DEBUGFS_DEL(antenna_sel_tx); DEBUGFS_DEL(antenna_sel_rx); - DEBUGFS_DEL(bridge_packets); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); @@ -356,8 +298,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_STATS_DEL(rx_expand_skb_head2); DEBUGFS_STATS_DEL(rx_handlers_fragments); DEBUGFS_STATS_DEL(tx_status_drop); - DEBUGFS_STATS_DEL(wme_tx_queue); - DEBUGFS_STATS_DEL(wme_rx_queue); #endif DEBUGFS_STATS_DEL(dot11ACKFailureCount); DEBUGFS_STATS_DEL(dot11RTSFailureCount); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 19efc3a6a932..a3294d109322 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -97,8 +97,8 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, break; case ALG_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", - key->u.tkip.iv32, - key->u.tkip.iv16); + key->u.tkip.tx.iv32, + key->u.tkip.tx.iv16); break; case ALG_CCMP: tpn = key->u.ccmp.tx_pn; @@ -128,8 +128,8 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%08x %04x\n", - key->u.tkip.iv32_rx[i], - key->u.tkip.iv16_rx[i]); + key->u.tkip.rx[i].iv32, + key->u.tkip.rx[i].iv16); len = p - buf; break; case ALG_CCMP: @@ -206,7 +206,8 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) rcu_read_lock(); sta = rcu_dereference(key->sta); if (sta) - sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr)); + sprintf(buf, "../../stations/%s", + print_mac(mac, sta->sta.addr)); rcu_read_unlock(); /* using sta as a boolean is fine outside RCU lock */ @@ -265,7 +266,7 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) key = sdata->default_key; if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); - sdata->debugfs.default_key = + sdata->common_debugfs.default_key = debugfs_create_symlink("default_key", sdata->debugfsdir, buf); } else @@ -277,8 +278,8 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) if (!sdata) return; - debugfs_remove(sdata->debugfs.default_key); - sdata->debugfs.default_key = NULL; + debugfs_remove(sdata->common_debugfs.default_key); + sdata->common_debugfs.default_key = NULL; } void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index e3326d046944..2a4515623776 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -155,8 +155,9 @@ static const struct file_operations name##_ops = { \ __IEEE80211_IF_WFILE(name) /* common attributes */ -IEEE80211_IF_FILE(channel_use, channel_use, DEC); IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); +IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); +IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); /* STA/IBSS attributes */ IEEE80211_IF_FILE(state, u.sta.state, DEC); @@ -172,7 +173,6 @@ IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); -IEEE80211_IF_FILE(num_beacons_sta, u.sta.num_beacons, DEC); static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -191,9 +191,6 @@ __IEEE80211_IF_FILE(flags); /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); -IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC); -IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC); -IEEE80211_IF_FILE(max_ratectrl_rateidx, u.ap.max_ratectrl_rateidx, DEC); static ssize_t ieee80211_if_fmt_num_buffered_multicast( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -208,37 +205,37 @@ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH /* Mesh stats attributes */ -IEEE80211_IF_FILE(fwded_frames, u.sta.mshstats.fwded_frames, DEC); -IEEE80211_IF_FILE(dropped_frames_ttl, u.sta.mshstats.dropped_frames_ttl, DEC); +IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); +IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, - u.sta.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.sta.mshstats.estab_plinks, ATOMIC); + u.mesh.mshstats.dropped_frames_no_route, DEC); +IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_WFILE(dot11MeshMaxRetries, - u.sta.mshcfg.dot11MeshMaxRetries, DEC, u8); + u.mesh.mshcfg.dot11MeshMaxRetries, DEC, u8); IEEE80211_IF_WFILE(dot11MeshRetryTimeout, - u.sta.mshcfg.dot11MeshRetryTimeout, DEC, u16); + u.mesh.mshcfg.dot11MeshRetryTimeout, DEC, u16); IEEE80211_IF_WFILE(dot11MeshConfirmTimeout, - u.sta.mshcfg.dot11MeshConfirmTimeout, DEC, u16); + u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHoldingTimeout, - u.sta.mshcfg.dot11MeshHoldingTimeout, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshTTL, u.sta.mshcfg.dot11MeshTTL, DEC, u8); -IEEE80211_IF_WFILE(auto_open_plinks, u.sta.mshcfg.auto_open_plinks, DEC, u8); + u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC, u16); +IEEE80211_IF_WFILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC, u8); +IEEE80211_IF_WFILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC, u8); IEEE80211_IF_WFILE(dot11MeshMaxPeerLinks, - u.sta.mshcfg.dot11MeshMaxPeerLinks, DEC, u16); + u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPactivePathTimeout, - u.sta.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32); + u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32); IEEE80211_IF_WFILE(dot11MeshHWMPpreqMinInterval, - u.sta.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16); + u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPnetDiameterTraversalTime, - u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16); + u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPmaxPREQretries, - u.sta.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8); + u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8); IEEE80211_IF_WFILE(path_refresh_time, - u.sta.mshcfg.path_refresh_time, DEC, u32); + u.mesh.mshcfg.path_refresh_time, DEC, u32); IEEE80211_IF_WFILE(min_discovery_timeout, - u.sta.mshcfg.min_discovery_timeout, DEC, u16); + u.mesh.mshcfg.min_discovery_timeout, DEC, u16); #endif @@ -248,8 +245,10 @@ IEEE80211_IF_WFILE(min_discovery_timeout, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, sta); DEBUGFS_ADD(drop_unencrypted, sta); + DEBUGFS_ADD(force_unicast_rateidx, sta); + DEBUGFS_ADD(max_ratectrl_rateidx, sta); + DEBUGFS_ADD(state, sta); DEBUGFS_ADD(bssid, sta); DEBUGFS_ADD(prev_bssid, sta); @@ -264,32 +263,33 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(auth_alg, sta); DEBUGFS_ADD(auth_transaction, sta); DEBUGFS_ADD(flags, sta); - DEBUGFS_ADD(num_beacons_sta, sta); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, ap); DEBUGFS_ADD(drop_unencrypted, ap); - DEBUGFS_ADD(num_sta_ps, ap); - DEBUGFS_ADD(dtim_count, ap); - DEBUGFS_ADD(num_beacons, ap); DEBUGFS_ADD(force_unicast_rateidx, ap); DEBUGFS_ADD(max_ratectrl_rateidx, ap); + + DEBUGFS_ADD(num_sta_ps, ap); + DEBUGFS_ADD(dtim_count, ap); DEBUGFS_ADD(num_buffered_multicast, ap); } static void add_wds_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, wds); DEBUGFS_ADD(drop_unencrypted, wds); + DEBUGFS_ADD(force_unicast_rateidx, wds); + DEBUGFS_ADD(max_ratectrl_rateidx, wds); + DEBUGFS_ADD(peer, wds); } static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_ADD(channel_use, vlan); DEBUGFS_ADD(drop_unencrypted, vlan); + DEBUGFS_ADD(force_unicast_rateidx, vlan); + DEBUGFS_ADD(max_ratectrl_rateidx, vlan); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) @@ -341,26 +341,26 @@ static void add_files(struct ieee80211_sub_if_data *sdata) return; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH add_mesh_stats(sdata); add_mesh_config(sdata); #endif - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: add_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: add_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: add_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: add_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: add_vlan_files(sdata); break; default: @@ -376,8 +376,10 @@ static void add_files(struct ieee80211_sub_if_data *sdata) static void del_sta_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, sta); DEBUGFS_DEL(drop_unencrypted, sta); + DEBUGFS_DEL(force_unicast_rateidx, sta); + DEBUGFS_DEL(max_ratectrl_rateidx, sta); + DEBUGFS_DEL(state, sta); DEBUGFS_DEL(bssid, sta); DEBUGFS_DEL(prev_bssid, sta); @@ -392,32 +394,33 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_DEL(auth_alg, sta); DEBUGFS_DEL(auth_transaction, sta); DEBUGFS_DEL(flags, sta); - DEBUGFS_DEL(num_beacons_sta, sta); } static void del_ap_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, ap); DEBUGFS_DEL(drop_unencrypted, ap); - DEBUGFS_DEL(num_sta_ps, ap); - DEBUGFS_DEL(dtim_count, ap); - DEBUGFS_DEL(num_beacons, ap); DEBUGFS_DEL(force_unicast_rateidx, ap); DEBUGFS_DEL(max_ratectrl_rateidx, ap); + + DEBUGFS_DEL(num_sta_ps, ap); + DEBUGFS_DEL(dtim_count, ap); DEBUGFS_DEL(num_buffered_multicast, ap); } static void del_wds_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, wds); DEBUGFS_DEL(drop_unencrypted, wds); + DEBUGFS_DEL(force_unicast_rateidx, wds); + DEBUGFS_DEL(max_ratectrl_rateidx, wds); + DEBUGFS_DEL(peer, wds); } static void del_vlan_files(struct ieee80211_sub_if_data *sdata) { - DEBUGFS_DEL(channel_use, vlan); DEBUGFS_DEL(drop_unencrypted, vlan); + DEBUGFS_DEL(force_unicast_rateidx, vlan); + DEBUGFS_DEL(max_ratectrl_rateidx, vlan); } static void del_monitor_files(struct ieee80211_sub_if_data *sdata) @@ -467,32 +470,32 @@ static void del_mesh_config(struct ieee80211_sub_if_data *sdata) } #endif -static void del_files(struct ieee80211_sub_if_data *sdata, int type) +static void del_files(struct ieee80211_sub_if_data *sdata) { if (!sdata->debugfsdir) return; - switch (type) { - case IEEE80211_IF_TYPE_MESH_POINT: + switch (sdata->vif.type) { + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH del_mesh_stats(sdata); del_mesh_config(sdata); #endif - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: del_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: del_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: del_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: del_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: del_vlan_files(sdata); break; default: @@ -512,29 +515,23 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->dev->name); sdata->debugfsdir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); + add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - del_files(sdata, sdata->vif.type); + del_files(sdata); debugfs_remove(sdata->debugfsdir); sdata->debugfsdir = NULL; } -void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata, - int oldtype) -{ - del_files(sdata, oldtype); - add_files(sdata); -} - -static int netdev_notify(struct notifier_block * nb, +static int netdev_notify(struct notifier_block *nb, unsigned long state, void *ndev) { struct net_device *dev = ndev; struct dentry *dir; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; char buf[10+IFNAMSIZ]; if (state != NETDEV_CHANGENAME) @@ -546,6 +543,8 @@ static int netdev_notify(struct notifier_block * nb, if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) return 0; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sprintf(buf, "netdev:%s", dev->name); dir = sdata->debugfsdir; if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index a690071fde8a..7af731f0b731 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -6,8 +6,6 @@ #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); -void ieee80211_debugfs_change_if_type(struct ieee80211_sub_if_data *sdata, - int oldtype); void ieee80211_debugfs_netdev_init(void); void ieee80211_debugfs_netdev_exit(void); #else @@ -17,9 +15,6 @@ static inline void ieee80211_debugfs_add_netdev( static inline void ieee80211_debugfs_remove_netdev( struct ieee80211_sub_if_data *sdata) {} -static inline void ieee80211_debugfs_change_if_type( - struct ieee80211_sub_if_data *sdata, int oldtype) -{} static inline void ieee80211_debugfs_netdev_init(void) {} diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6d47a1d31b37..b9902e425f09 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -50,7 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_READ_##format(name, field) \ STA_OPS(name) -STA_FILE(aid, aid, D); +STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->dev->name, S); STA_FILE(rx_packets, rx_packets, LU); STA_FILE(tx_packets, tx_packets, LU); @@ -63,10 +63,9 @@ STA_FILE(tx_fragments, tx_fragments, LU); STA_FILE(tx_filtered, tx_filtered_count, LU); STA_FILE(tx_retry_failed, tx_retry_failed, LU); STA_FILE(tx_retry_count, tx_retry_count, LU); -STA_FILE(last_rssi, last_rssi, D); STA_FILE(last_signal, last_signal, D); +STA_FILE(last_qual, last_qual, D); STA_FILE(last_noise, last_noise, D); -STA_FILE(channel_use, channel_use, D); STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, @@ -74,14 +73,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, { char buf[100]; struct sta_info *sta = file->private_data; + u32 staflags = get_sta_flags(sta); int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", - sta->flags & WLAN_STA_AUTH ? "AUTH\n" : "", - sta->flags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - sta->flags & WLAN_STA_PS ? "PS\n" : "", - sta->flags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", - sta->flags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", - sta->flags & WLAN_STA_WME ? "WME\n" : "", - sta->flags & WLAN_STA_WDS ? "WDS\n" : ""); + staflags & WLAN_STA_AUTH ? "AUTH\n" : "", + staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", + staflags & WLAN_STA_PS ? "PS\n" : "", + staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", + staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", + staflags & WLAN_STA_WME ? "WME\n" : "", + staflags & WLAN_STA_WDS ? "WDS\n" : ""); return simple_read_from_buffer(userbuf, count, ppos, buf, res); } STA_OPS(flags); @@ -123,36 +123,6 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, } STA_OPS(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS -static ssize_t sta_wme_rx_queue_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[15*NUM_RX_DATA_QUEUES], *p = buf; - int i; - struct sta_info *sta = file->private_data; - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, "%u ", - sta->wme_rx_queue[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\n"); - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -} -STA_OPS(wme_rx_queue); - -static ssize_t sta_wme_tx_queue_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[15*NUM_TX_DATA_QUEUES], *p = buf; - int i; - struct sta_info *sta = file->private_data; - for (i = 0; i < NUM_TX_DATA_QUEUES; i++) - p += scnprintf(p, sizeof(buf)+buf-p, "%u ", - sta->wme_tx_queue[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\n"); - return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -} -STA_OPS(wme_tx_queue); -#endif - static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -203,10 +173,9 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - struct net_device *dev = sta->sdata->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sta->sdata->local; struct ieee80211_hw *hw = &local->hw; - u8 *da = sta->addr; + u8 *da = sta->sta.addr; static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static int tid_static_rx[16] = {1, 1, 1, 1, 1, 1, 1, 1, @@ -231,7 +200,7 @@ static ssize_t sta_agg_status_write(struct file *file, tid_num = tid_num - 100; if (tid_static_rx[tid_num] == 1) { strcpy(state, "off "); - ieee80211_sta_stop_rx_ba_session(dev, da, tid_num, 0, + ieee80211_sta_stop_rx_ba_session(sta->sdata, da, tid_num, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); sta->ampdu_mlme.tid_state_rx[tid_num] |= HT_AGG_STATE_DEBUGFS_CTL; @@ -283,7 +252,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) if (!stations_dir) return; - mac = print_mac(mbuf, sta->addr); + mac = print_mac(mbuf, sta->sta.addr); sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) @@ -293,10 +262,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(num_ps_buf_frames); DEBUGFS_ADD(inactive_ms); DEBUGFS_ADD(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_ADD(wme_rx_queue); - DEBUGFS_ADD(wme_tx_queue); -#endif DEBUGFS_ADD(agg_status); } @@ -306,10 +271,6 @@ void ieee80211_sta_debugfs_remove(struct sta_info *sta) DEBUGFS_DEL(num_ps_buf_frames); DEBUGFS_DEL(inactive_ms); DEBUGFS_DEL(last_seq_ctrl); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_DEL(wme_rx_queue); - DEBUGFS_DEL(wme_tx_queue); -#endif DEBUGFS_DEL(agg_status); debugfs_remove(sta->debugfs.dir); diff --git a/net/mac80211/event.c b/net/mac80211/event.c index 2280f40b4560..8de60de70bc9 100644 --- a/net/mac80211/event.c +++ b/net/mac80211/event.c @@ -8,7 +8,6 @@ * mac80211 - events */ -#include <linux/netdevice.h> #include <net/iw_handler.h> #include "ieee80211_i.h" @@ -17,7 +16,7 @@ * (in the variable hdr) must be long enough to extract the TKIP * fields like TSC */ -void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, +void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, struct ieee80211_hdr *hdr) { union iwreq_data wrqu; @@ -32,7 +31,7 @@ void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, print_mac(mac, hdr->addr2)); memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = strlen(buf); - wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf); kfree(buf); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c new file mode 100644 index 000000000000..dc7d9a3d70d5 --- /dev/null +++ b/net/mac80211/ht.c @@ -0,0 +1,992 @@ +/* + * HT handling + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * Copyright 2007-2008, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ieee80211.h> +#include <net/wireless.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "sta_info.h" +#include "wme.h" + +int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_ht_info *ht_info) +{ + + if (ht_info == NULL) + return -EINVAL; + + memset(ht_info, 0, sizeof(*ht_info)); + + if (ht_cap_ie) { + u8 ampdu_info = ht_cap_ie->ampdu_params_info; + + ht_info->ht_supported = 1; + ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); + ht_info->ampdu_factor = + ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; + ht_info->ampdu_density = + (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; + memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); + } else + ht_info->ht_supported = 0; + + return 0; +} + +int ieee80211_ht_addt_info_ie_to_ht_bss_info( + struct ieee80211_ht_addt_info *ht_add_info_ie, + struct ieee80211_ht_bss_info *bss_info) +{ + if (bss_info == NULL) + return -EINVAL; + + memset(bss_info, 0, sizeof(*bss_info)); + + if (ht_add_info_ie) { + u16 op_mode; + op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); + + bss_info->primary_channel = ht_add_info_ie->control_chan; + bss_info->bss_cap = ht_add_info_ie->ht_param; + bss_info->bss_op_mode = (u8)(op_mode & 0xff); + } + + return 0; +} + +static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u8 dialog_token, u16 start_seq_num, + u16 agg_size, u16 timeout) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer " + "for addba request frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); + + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + + mgmt->u.action.u.addba_req.dialog_token = dialog_token; + capab = (u16)(1 << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ + + mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + + mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_req.start_seq_num = + cpu_to_le16(start_seq_num << 4); + + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, + u8 dialog_token, u16 status, u16 policy, + u16 buf_size, u16 timeout) +{ + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer " + "for addba resp frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; + mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + + capab = (u16)(policy << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ + + mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); + mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u16 initiator, u16 reason_code) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 params; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer " + "for delba frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); + + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; + params = (u16)(initiator << 11); /* bit 11 initiator */ + params |= (u16)(tid << 12); /* bit 15:12 TID number */ + + mgmt->u.action.u.delba.params = cpu_to_le16(params); + mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_bar *bar; + u16 bar_control = 0; + + skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "bar frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); + memset(bar, 0, sizeof(*bar)); + bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_BACK_REQ); + memcpy(bar->ra, ra, ETH_ALEN); + memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN); + bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; + bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; + bar_control |= (u16)(tid << 12); + bar->control = cpu_to_le16(bar_control); + bar->start_seq_num = cpu_to_le16(ssn); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + int ret, i; + DECLARE_MAC_BUF(mac); + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return; + } + + /* check if TID is in operational state */ + spin_lock_bh(&sta->lock); + if (sta->ampdu_mlme.tid_state_rx[tid] + != HT_AGG_STATE_OPERATIONAL) { + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + sta->ampdu_mlme.tid_state_rx[tid] = + HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + spin_unlock_bh(&sta->lock); + + /* stop HW Rx aggregation. ampdu_action existence + * already verified in session init so we add the BUG_ON */ + BUG_ON(!local->ops->ampdu_action); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, + &sta->sta, tid, NULL); + if (ret) + printk(KERN_DEBUG "HW problem - can not stop rx " + "aggregation for tid %d\n", tid); + + /* shutdown timer has not expired */ + if (initiator != WLAN_BACK_TIMER) + del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + /* check if this is a self generated aggregation halt */ + if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) + ieee80211_send_delba(sdata, ra, tid, 0, reason); + + /* free the reordering buffer */ + for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { + if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { + /* release the reordered frames */ + dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); + sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; + sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; + } + } + /* free resources */ + kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); + kfree(sta->ampdu_mlme.tid_rx[tid]); + sta->ampdu_mlme.tid_rx[tid] = NULL; + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; + + rcu_read_unlock(); +} + + +/* + * After sending add Block Ack request we activated a timer until + * add Block Ack response will arrive from the recipient. + * If this timer expires sta_addba_resp_timer_expired will be executed. + */ +static void sta_addba_resp_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and both sta_info and TID are needed, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u16 tid = *(u8 *)data; + struct sta_info *temp_sta = container_of((void *)data, + struct sta_info, timer_to_tid[tid]); + + struct ieee80211_local *local = temp_sta->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + u8 *state; + + rcu_read_lock(); + + sta = sta_info_get(local, temp_sta->sta.addr); + if (!sta) { + rcu_read_unlock(); + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID waits for addBA response */ + spin_lock_bh(&sta->lock); + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "timer expired on tid %d but we are not " + "expecting addBA response there", tid); +#endif + goto timer_expired_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); +#endif + + /* go through the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + +timer_expired_exit: + rcu_read_unlock(); +} + +void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + int i; + + for (i = 0; i < STA_TID_NUM; i++) { + ieee80211_stop_tx_ba_session(&local->hw, addr, i, + WLAN_BACK_INITIATOR); + ieee80211_sta_stop_rx_ba_session(sdata, addr, i, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS); + } +} + +int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + u16 start_seq_num; + u8 *state; + int ret; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) + return -EINVAL; + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Open BA session requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find the station\n"); +#endif + ret = -ENOENT; + goto exit; + } + + spin_lock_bh(&sta->lock); + + /* we have tried too many times, receiver does not want A-MPDU */ + if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { + ret = -EBUSY; + goto err_unlock_sta; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID is not in aggregation flow already */ + if (*state != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - session is not " + "idle on tid %u\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + ret = -EAGAIN; + goto err_unlock_sta; + } + + /* prepare A-MPDU MLME for Tx aggregation */ + sta->ampdu_mlme.tid_tx[tid] = + kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_tx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate tx mlme to tid %d failed\n", + tid); +#endif + ret = -ENOMEM; + goto err_unlock_sta; + } + /* Tx timer */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = + sta_addba_resp_timer_expired; + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); + + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - queue unavailable for" + " tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto err_unlock_queue; + } + sdata = sta->sdata; + + /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the + * call back right away, it must see that the flow has begun */ + *state |= HT_ADDBA_REQUESTED_MSK; + + /* This is slightly racy because the queue isn't stopped */ + start_seq_num = sta->tid_seq[tid]; + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); + + if (ret) { + /* No need to requeue the packets in the agg queue, since we + * held the tx lock: no packet could be enqueued to the newly + * allocated queue */ + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - HW unavailable for" + " tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + *state = HT_AGG_STATE_IDLE; + goto err_unlock_queue; + } + + /* Will put all the packets in the new SW queue */ + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + spin_unlock_bh(&sta->lock); + + /* send an addBA request */ + sta->ampdu_mlme.dialog_token_allocator++; + sta->ampdu_mlme.tid_tx[tid]->dialog_token = + sta->ampdu_mlme.dialog_token_allocator; + sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + + + ieee80211_send_addba_request(sta->sdata, ra, tid, + sta->ampdu_mlme.tid_tx[tid]->dialog_token, + sta->ampdu_mlme.tid_tx[tid]->ssn, + 0x40, 5000); + /* activate the timer for the recipient's addBA response */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = + jiffies + ADDBA_RESP_INTERVAL; + add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); +#endif + goto exit; + +err_unlock_queue: + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + ret = -EBUSY; +err_unlock_sta: + spin_unlock_bh(&sta->lock); +exit: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_session); + +int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, + u8 *ra, u16 tid, + enum ieee80211_back_parties initiator) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int ret = 0; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) + return -EINVAL; + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return -ENOENT; + } + + /* check if the TID is in aggregation */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (*state != HT_AGG_STATE_OPERATIONAL) { + ret = -ENOENT; + goto stop_BA_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + + *state = HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, + &sta->sta, tid, NULL); + + /* case HW denied going back to legacy */ + if (ret) { + WARN_ON(ret != -EBUSY); + *state = HT_AGG_STATE_OPERATIONAL; + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + goto stop_BA_exit; + } + +stop_BA_exit: + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); + +void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %s\n", + print_mac(mac, ra)); +#endif + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", + *state); +#endif + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + + WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); + + *state |= HT_ADDBA_DRV_READY_MSK; + + if (*state == HT_AGG_STATE_OPERATIONAL) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); +#endif + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + } + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); + +void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int agg_queue; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %s\n", + print_mac(mac, ra)); +#endif + rcu_read_unlock(); + return; + } + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + /* NOTE: no need to use sta->lock in this state check, as + * ieee80211_stop_tx_ba_session will let only one stop call to + * pass through per sta/tid + */ + if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); +#endif + rcu_read_unlock(); + return; + } + + if (*state & HT_AGG_STATE_INITIATOR_MSK) + ieee80211_send_delba(sta->sdata, ra, tid, + WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + + agg_queue = sta->tid_to_tx_q[tid]; + + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + spin_lock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; + sta->ampdu_mlme.addba_req_num[tid] = 0; + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + spin_unlock_bh(&sta->lock); + + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); + +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping start BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_ADDBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); + +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping stop BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_DELBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); + +/* + * After accepting the AddBA Request we activated a timer, + * resetting it after each frame that arrives from the originator. + * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. + */ +static void sta_rx_agg_session_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and various sta_info are needed here, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u8 *ptid = (u8 *)data; + u8 *timer_to_id = ptid - *ptid; + struct sta_info *sta = container_of(timer_to_id, struct sta_info, + timer_to_tid[0]); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); +#endif + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, + (u16)*ptid, WLAN_BACK_TIMER, + WLAN_REASON_QSTA_TIMEOUT); +} + +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + struct ieee80211_conf *conf = &hw->conf; + struct tid_ampdu_rx *tid_agg_rx; + u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; + u8 dialog_token; + int ret = -EOPNOTSUPP; + DECLARE_MAC_BUF(mac); + + /* extract session parameters from addba request frame */ + dialog_token = mgmt->u.action.u.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + start_seq_num = + le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + + capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + + status = WLAN_STATUS_REQUEST_DECLINED; + + /* sanity check for incoming parameters: + * check if configuration can support the BA policy + * and if buffer size does not exceeds max value */ + if (((ba_policy != 1) + && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) + || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { + status = WLAN_STATUS_INVALID_QOS_PARAM; +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "AddBA Req with bad params from " + "%s on tid %u. policy %d, buffer size %d\n", + print_mac(mac, mgmt->sa), tid, ba_policy, + buf_size); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end_no_lock; + } + /* determine default buffer size */ + if (buf_size == 0) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[conf->channel->band]; + buf_size = IEEE80211_MIN_AMPDU_BUF; + buf_size = buf_size << sband->ht_info.ampdu_factor; + } + + + /* examine state machine */ + spin_lock_bh(&sta->lock); + + if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "unexpected AddBA Req from " + "%s on tid %u\n", + print_mac(mac, mgmt->sa), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end; + } + + /* prepare A-MPDU MLME for Rx aggregation */ + sta->ampdu_mlme.tid_rx[tid] = + kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_rx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate rx mlme to tid %d failed\n", + tid); +#endif + goto end; + } + /* rx timer */ + sta->ampdu_mlme.tid_rx[tid]->session_timer.function = + sta_rx_agg_session_timer_expired; + sta->ampdu_mlme.tid_rx[tid]->session_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + + /* prepare reordering buffer */ + tid_agg_rx->reorder_buf = + kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); + if (!tid_agg_rx->reorder_buf) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "can not allocate reordering buffer " + "to tid %d\n", tid); +#endif + kfree(sta->ampdu_mlme.tid_rx[tid]); + goto end; + } + memset(tid_agg_rx->reorder_buf, 0, + buf_size * sizeof(struct sk_buff *)); + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, + &sta->sta, tid, &start_seq_num); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (ret) { + kfree(tid_agg_rx->reorder_buf); + kfree(tid_agg_rx); + sta->ampdu_mlme.tid_rx[tid] = NULL; + goto end; + } + + /* change state and send addba resp */ + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; + tid_agg_rx->dialog_token = dialog_token; + tid_agg_rx->ssn = start_seq_num; + tid_agg_rx->head_seq_num = start_seq_num; + tid_agg_rx->buf_size = buf_size; + tid_agg_rx->timeout = timeout; + tid_agg_rx->stored_mpdu_num = 0; + status = WLAN_STATUS_SUCCESS; +end: + spin_unlock_bh(&sta->lock); + +end_no_lock: + ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + dialog_token, status, 1, buf_size, timeout); +} + +void ieee80211_process_addba_resp(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + u16 capab; + u16 tid; + u8 *state; + + capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + return; + } + + if (mgmt->u.action.u.addba_resp.dialog_token != + sta->ampdu_mlme.tid_tx[tid]->dialog_token) { + spin_unlock_bh(&sta->lock); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + return; + } + + del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + == WLAN_STATUS_SUCCESS) { + *state |= HT_ADDBA_RECEIVED_MSK; + sta->ampdu_mlme.addba_req_num[tid] = 0; + + if (*state == HT_AGG_STATE_OPERATIONAL) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + + spin_unlock_bh(&sta->lock); + } else { + sta->ampdu_mlme.addba_req_num[tid]++; + /* this will allow the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + } +} + +void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_local *local = sdata->local; + u16 tid, params; + u16 initiator; + DECLARE_MAC_BUF(mac); + + params = le16_to_cpu(mgmt->u.action.u.delba.params); + tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; + initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; + +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n", + print_mac(mac, mgmt->sa), + initiator ? "initiator" : "recipient", tid, + mgmt->u.action.u.delba.reason_code); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (initiator == WLAN_BACK_INITIATOR) + ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid, + WLAN_BACK_INITIATOR, 0); + else { /* WLAN_BACK_RECIPIENT */ + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.tid_state_tx[tid] = + HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, + WLAN_BACK_RECIPIENT); + } +} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 006486b26726..8025b294588b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -23,20 +24,11 @@ #include <linux/spinlock.h> #include <linux/etherdevice.h> #include <net/wireless.h> +#include <net/iw_handler.h> +#include <net/mac80211.h> #include "key.h" #include "sta_info.h" -/* ieee80211.o internal definitions, etc. These are not included into - * low-level drivers. */ - -#ifndef ETH_P_PAE -#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ -#endif /* ETH_P_PAE */ - -#define WLAN_FC_DATA_PRESENT(fc) (((fc) & 0x4c) == 0x08) - -#define IEEE80211_FC(type, subtype) cpu_to_le16(type | subtype) - struct ieee80211_local; /* Maximum number of broadcast/multicast frames to buffer when some of the @@ -58,6 +50,12 @@ struct ieee80211_local; * increased memory use (about 2 kB of RAM per entry). */ #define IEEE80211_FRAGMENT_MAX 4 +/* + * Time after which we ignore scan results and no longer report/use + * them in any way. + */ +#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -70,27 +68,25 @@ struct ieee80211_fragment_entry { }; -struct ieee80211_sta_bss { +struct ieee80211_bss { struct list_head list; - struct ieee80211_sta_bss *hnext; + struct ieee80211_bss *hnext; size_t ssid_len; atomic_t users; u8 bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 dtim_period; u16 capability; /* host byte order */ enum ieee80211_band band; int freq; - int rssi, signal, noise; - u8 *wpa_ie; - size_t wpa_ie_len; - u8 *rsn_ie; - size_t rsn_ie_len; - u8 *wmm_ie; - size_t wmm_ie_len; - u8 *ht_ie; - size_t ht_ie_len; + int signal, noise, qual; + u8 *ies; /* all information elements from the last Beacon or Probe + * Response frames; note Beacon frame is not allowed to + * override values from Probe Response */ + size_t ies_len; + bool wmm_used; #ifdef CONFIG_MAC80211_MESH u8 *mesh_id; size_t mesh_id_len; @@ -102,7 +98,7 @@ struct ieee80211_sta_bss { u64 timestamp; int beacon_int; - bool probe_resp; + unsigned long last_probe_resp; unsigned long last_update; /* during assocation, we save an ERP value from a probe response so @@ -113,7 +109,7 @@ struct ieee80211_sta_bss { u8 erp_value; }; -static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss) +static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_cfg; @@ -121,7 +117,7 @@ static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss) return NULL; } -static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss) +static inline u8 *bss_mesh_id(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_id; @@ -129,7 +125,7 @@ static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss) return NULL; } -static inline u8 bss_mesh_id_len(struct ieee80211_sta_bss *bss) +static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_id_len; @@ -147,7 +143,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) #define IEEE80211_TX_PROBE_LAST_FRAG BIT(3) -#define IEEE80211_TX_INJECTED BIT(4) struct ieee80211_tx_data { struct sk_buff *skb; @@ -157,20 +152,19 @@ struct ieee80211_tx_data { struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_tx_control *control; struct ieee80211_channel *channel; - struct ieee80211_rate *rate; + s8 rate_idx; /* use this rate (if set) for last fragment; rate can * be set to lower rate for the first fragments, e.g., * when using CTS protection with IEEE 802.11g. */ - struct ieee80211_rate *last_frag_rate; + s8 last_frag_rate_idx; /* Extra fragments (in addition to the first fragment * in skb) */ struct sk_buff **extra_frag; int num_extra_frag; - u16 fc, ethertype; + u16 ethertype; unsigned int flags; }; @@ -198,36 +192,20 @@ struct ieee80211_rx_data { struct ieee80211_rx_status *status; struct ieee80211_rate *rate; - u16 fc, ethertype; + u16 ethertype; unsigned int flags; int sent_ps_buffered; int queue; - int load; u32 tkip_iv32; u16 tkip_iv16; }; -/* flags used in struct ieee80211_tx_packet_data.flags */ -#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) -#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) -#define IEEE80211_TXPD_REQUEUE BIT(2) -#define IEEE80211_TXPD_EAPOL_FRAME BIT(3) -#define IEEE80211_TXPD_AMPDU BIT(4) -/* Stored in sk_buff->cb */ -struct ieee80211_tx_packet_data { - int ifindex; - unsigned long jiffies; - unsigned int flags; - u8 queue; -}; - struct ieee80211_tx_stored_packet { - struct ieee80211_tx_control control; struct sk_buff *skb; struct sk_buff **extra_frag; - struct ieee80211_rate *last_frag_rate; + s8 last_frag_rate_idx; int num_extra_frag; - unsigned int last_frag_rate_ctrl_probe; + bool last_frag_rate_ctrl_probe; }; struct beacon_data { @@ -251,9 +229,6 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; - int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ - int max_ratectrl_rateidx; /* max TX rateidx for rate control */ - int num_beacons; /* number of TXed beacon frames for this BSS */ }; struct ieee80211_if_wds { @@ -262,7 +237,6 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - struct ieee80211_sub_if_data *ap; struct list_head list; }; @@ -315,48 +289,37 @@ struct mesh_config { #define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) #define IEEE80211_STA_PRIVACY_INVOKED BIT(13) +/* flags for MLME request */ +#define IEEE80211_STA_REQ_SCAN 0 +#define IEEE80211_STA_REQ_DIRECT_PROBE 1 +#define IEEE80211_STA_REQ_AUTH 2 +#define IEEE80211_STA_REQ_RUN 3 + +/* STA/IBSS MLME states */ +enum ieee80211_sta_mlme_state { + IEEE80211_STA_MLME_DISABLED, + IEEE80211_STA_MLME_DIRECT_PROBE, + IEEE80211_STA_MLME_AUTHENTICATE, + IEEE80211_STA_MLME_ASSOCIATE, + IEEE80211_STA_MLME_ASSOCIATED, + IEEE80211_STA_MLME_IBSS_SEARCH, + IEEE80211_STA_MLME_IBSS_JOINED, +}; + +/* bitfield of allowed auth algs */ +#define IEEE80211_AUTH_ALG_OPEN BIT(0) +#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) +#define IEEE80211_AUTH_ALG_LEAP BIT(2) + struct ieee80211_if_sta { struct timer_list timer; struct work_struct work; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; - enum { - IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, - IEEE80211_ASSOCIATE, IEEE80211_ASSOCIATED, - IEEE80211_IBSS_SEARCH, IEEE80211_IBSS_JOINED, - IEEE80211_MESH_UP - } state; + enum ieee80211_sta_mlme_state state; size_t ssid_len; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; -#ifdef CONFIG_MAC80211_MESH - struct timer_list mesh_path_timer; - u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; - size_t mesh_id_len; - /* Active Path Selection Protocol Identifier */ - u8 mesh_pp_id[4]; - /* Active Path Selection Metric Identifier */ - u8 mesh_pm_id[4]; - /* Congestion Control Mode Identifier */ - u8 mesh_cc_id[4]; - /* Local mesh Destination Sequence Number */ - u32 dsn; - /* Last used PREQ ID */ - u32 preq_id; - atomic_t mpaths; - /* Timestamp of last DSN update */ - unsigned long last_dsn_update; - /* Timestamp of last DSN sent */ - unsigned long last_preq; - struct mesh_rmc *rmc; - spinlock_t mesh_preq_queue_lock; - struct mesh_preq_queue preq_queue; - int preq_queue_len; - struct mesh_stats mshstats; - struct mesh_config mshcfg; - u32 mesh_seqnum; - bool accepting_plinks; -#endif u16 aid; u16 ap_capab, capab; u8 *extra_ie; /* to be added to the end of AssocReq */ @@ -368,20 +331,17 @@ struct ieee80211_if_sta { struct sk_buff_head skb_queue; - int auth_tries, assoc_tries; + int assoc_scan_tries; /* number of scans done pre-association */ + int direct_probe_tries; /* retries for direct probes */ + int auth_tries; /* retries for auth req */ + int assoc_tries; /* retries for assoc req */ unsigned long request; unsigned long last_probe; unsigned int flags; -#define IEEE80211_STA_REQ_SCAN 0 -#define IEEE80211_STA_REQ_AUTH 1 -#define IEEE80211_STA_REQ_RUN 2 -#define IEEE80211_AUTH_ALG_OPEN BIT(0) -#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) -#define IEEE80211_AUTH_ALG_LEAP BIT(2) unsigned int auth_algs; /* bitfield of allowed auth algs */ int auth_alg; /* currently used IEEE 802.11 authentication algorithm */ int auth_transaction; @@ -391,31 +351,70 @@ struct ieee80211_if_sta { u32 supp_rates_bits[IEEE80211_NUM_BANDS]; int wmm_last_param_set; - int num_beacons; /* number of TXed beacon frames by this STA */ }; -static inline void ieee80211_if_sta_set_mesh_id(struct ieee80211_if_sta *ifsta, - u8 mesh_id_len, u8 *mesh_id) -{ -#ifdef CONFIG_MAC80211_MESH - ifsta->mesh_id_len = mesh_id_len; - memcpy(ifsta->mesh_id, mesh_id, mesh_id_len); -#endif -} +struct ieee80211_if_mesh { + struct work_struct work; + struct timer_list housekeeping_timer; + struct timer_list mesh_path_timer; + struct sk_buff_head skb_queue; + + bool housekeeping; + + u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; + size_t mesh_id_len; + /* Active Path Selection Protocol Identifier */ + u8 mesh_pp_id[4]; + /* Active Path Selection Metric Identifier */ + u8 mesh_pm_id[4]; + /* Congestion Control Mode Identifier */ + u8 mesh_cc_id[4]; + /* Local mesh Destination Sequence Number */ + u32 dsn; + /* Last used PREQ ID */ + u32 preq_id; + atomic_t mpaths; + /* Timestamp of last DSN update */ + unsigned long last_dsn_update; + /* Timestamp of last DSN sent */ + unsigned long last_preq; + struct mesh_rmc *rmc; + spinlock_t mesh_preq_queue_lock; + struct mesh_preq_queue preq_queue; + int preq_queue_len; + struct mesh_stats mshstats; + struct mesh_config mshcfg; + u32 mesh_seqnum; + bool accepting_plinks; +}; #ifdef CONFIG_MAC80211_MESH -#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name) \ - do { (sta)->mshstats.name++; } while (0) +#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \ + do { (msh)->mshstats.name++; } while (0) #else -#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name) \ +#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \ do { } while (0) #endif -/* flags used in struct ieee80211_sub_if_data.flags */ -#define IEEE80211_SDATA_ALLMULTI BIT(0) -#define IEEE80211_SDATA_PROMISC BIT(1) -#define IEEE80211_SDATA_USERSPACE_MLME BIT(2) -#define IEEE80211_SDATA_OPERATING_GMODE BIT(3) +/** + * enum ieee80211_sub_if_data_flags - virtual interface flags + * + * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets + * @IEEE80211_SDATA_PROMISC: interface is promisc + * @IEEE80211_SDATA_USERSPACE_MLME: userspace MLME is active + * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode + * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between + * associated stations and deliver multicast frames both + * back to wireless media and to the local net stack. + */ +enum ieee80211_sub_if_data_flags { + IEEE80211_SDATA_ALLMULTI = BIT(0), + IEEE80211_SDATA_PROMISC = BIT(1), + IEEE80211_SDATA_USERSPACE_MLME = BIT(2), + IEEE80211_SDATA_OPERATING_GMODE = BIT(3), + IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(4), +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -431,13 +430,6 @@ struct ieee80211_sub_if_data { int drop_unencrypted; - /* - * basic rates of this AP or the AP we're associated to - */ - u64 basic_rates; - - u16 sequence; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -446,32 +438,34 @@ struct ieee80211_sub_if_data { struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; struct ieee80211_key *default_key; + /* BSS configuration for this interface. */ + struct ieee80211_bss_conf bss_conf; + /* - * BSS configuration for this interface. - * - * FIXME: I feel bad putting this here when we already have a - * bss pointer, but the bss pointer is just wrong when - * you have multiple virtual STA mode interfaces... - * This needs to be fixed. + * AP this belongs to: self in AP mode and + * corresponding AP in VLAN mode, NULL for + * all others (might be needed later in IBSS) */ - struct ieee80211_bss_conf bss_conf; - struct ieee80211_if_ap *bss; /* BSS that this device belongs to */ + struct ieee80211_if_ap *bss; + + int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ + int max_ratectrl_rateidx; /* max TX rateidx for rate control */ union { struct ieee80211_if_ap ap; struct ieee80211_if_wds wds; struct ieee80211_if_vlan vlan; struct ieee80211_if_sta sta; +#ifdef CONFIG_MAC80211_MESH + struct ieee80211_if_mesh mesh; +#endif u32 mntr_flags; } u; - int channel_use; - int channel_use_raw; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfsdir; union { struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *state; struct dentry *bssid; @@ -487,32 +481,35 @@ struct ieee80211_sub_if_data { struct dentry *auth_alg; struct dentry *auth_transaction; struct dentry *flags; - struct dentry *num_beacons_sta; + struct dentry *force_unicast_rateidx; + struct dentry *max_ratectrl_rateidx; } sta; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *num_sta_ps; struct dentry *dtim_count; - struct dentry *num_beacons; struct dentry *force_unicast_rateidx; struct dentry *max_ratectrl_rateidx; struct dentry *num_buffered_multicast; } ap; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; struct dentry *peer; + struct dentry *force_unicast_rateidx; + struct dentry *max_ratectrl_rateidx; } wds; struct { - struct dentry *channel_use; struct dentry *drop_unencrypted; + struct dentry *force_unicast_rateidx; + struct dentry *max_ratectrl_rateidx; } vlan; struct { struct dentry *mode; } monitor; - struct dentry *default_key; } debugfs; + struct { + struct dentry *default_key; + } common_debugfs; #ifdef CONFIG_MAC80211_MESH struct dentry *mesh_stats_dir; @@ -553,7 +550,18 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev) +static inline void +ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, + u8 mesh_id_len, u8 *mesh_id) +{ +#ifdef CONFIG_MAC80211_MESH + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + ifmsh->mesh_id_len = mesh_id_len; + memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len); +#else + WARN_ON(1); +#endif +} enum { IEEE80211_RX_MSG = 1, @@ -562,6 +570,13 @@ enum { IEEE80211_ADDBA_MSG = 4, }; +/* maximum number of hardware queues we support. */ +#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) + +struct ieee80211_master_priv { + struct ieee80211_local *local; +}; + struct ieee80211_local { /* embed the driver visible part. * don't cast (use the static inlines below), but we keep @@ -570,6 +585,8 @@ struct ieee80211_local { const struct ieee80211_ops *ops; + unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; + struct net_device *mdev; /* wmaster# - "master" 802.11 device */ int open_count; int monitors, cooked_mntrs; @@ -581,12 +598,6 @@ struct ieee80211_local { bool tim_in_locked_section; /* see ieee80211_beacon_get() */ int tx_headroom; /* required headroom for hardware/radiotap */ - enum { - IEEE80211_DEV_UNINITIALIZED = 0, - IEEE80211_DEV_REGISTERED, - IEEE80211_DEV_UNREGISTERED, - } reg_state; - /* Tasklet and skb queue to process calls from IRQ mode. All frames * added to skb_queue will be processed, but frames in * skb_queue_unreliable may be dropped if the total length of these @@ -610,8 +621,9 @@ struct ieee80211_local { struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; - unsigned long state[NUM_TX_DATA_QUEUES_AMPDU]; - struct ieee80211_tx_stored_packet pending_packet[NUM_TX_DATA_QUEUES_AMPDU]; + unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; + unsigned long queues_pending_run[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; + struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; /* number of interfaces with corresponding IFF_ flags */ @@ -628,10 +640,6 @@ struct ieee80211_local { struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; - int bridge_packets; /* bridge packets between associated stations and - * deliver multicast frames both back to wireless - * media and to the local net stack */ - struct list_head interfaces; /* @@ -641,21 +649,21 @@ struct ieee80211_local { spinlock_t key_lock; - bool sta_sw_scanning; - bool sta_hw_scanning; + /* Scanning and BSS list */ + bool sw_scanning, hw_scanning; int scan_channel_idx; enum ieee80211_band scan_band; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; unsigned long last_scan_completed; struct delayed_work scan_work; - struct net_device *scan_dev; + struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_channel *oper_channel, *scan_channel; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; - struct list_head sta_bss_list; - struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE]; - spinlock_t sta_bss_lock; + struct list_head bss_list; + struct ieee80211_bss *bss_hash[STA_HASH_SIZE]; + spinlock_t bss_lock; /* SNMP counters */ /* dot11CountersTable */ @@ -677,9 +685,6 @@ struct ieee80211_local { assoc_led_name[32], radio_led_name[32]; #endif - u32 channel_use; - u32 channel_use_raw; - #ifdef CONFIG_MAC80211_DEBUGFS struct work_struct sta_debugfs_add; #endif @@ -705,8 +710,6 @@ struct ieee80211_local { unsigned int rx_expand_skb_head2; unsigned int rx_handlers_fragments; unsigned int tx_status_drop; - unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; - unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; #define I802_DEBUG_INC(c) (c)++ #else /* CONFIG_MAC80211_DEBUG_COUNTERS */ #define I802_DEBUG_INC(c) do { } while (0) @@ -721,10 +724,11 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { + struct dentry *rcdir; + struct dentry *rcname; struct dentry *frequency; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; - struct dentry *bridge_packets; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; @@ -764,8 +768,6 @@ struct ieee80211_local { struct dentry *rx_expand_skb_head2; struct dentry *rx_handlers_fragments; struct dentry *tx_status_drop; - struct dentry *wme_tx_queue; - struct dentry *wme_rx_queue; #endif struct dentry *dot11ACKFailureCount; struct dentry *dot11RTSFailureCount; @@ -778,6 +780,16 @@ struct ieee80211_local { #endif }; +static inline struct ieee80211_sub_if_data * +IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + BUG_ON(!local || local->mdev == dev); + + return netdev_priv(dev); +} + /* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { u8 ra[ETH_ALEN]; @@ -786,6 +798,9 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { + u8 *ie_start; + size_t total_len; + /* pointers to IEs */ u8 *ssid; u8 *supp_rates; @@ -809,6 +824,10 @@ struct ieee802_11_elems { u8 *preq; u8 *prep; u8 *perr; + u8 *ch_switch_elem; + u8 *country_elem; + u8 *pwr_constr_elem; + u8 *quiet_elem; /* first quite element */ /* length of them, respectively */ u8 ssid_len; @@ -833,6 +852,11 @@ struct ieee802_11_elems { u8 preq_len; u8 prep_len; u8 perr_len; + u8 ch_switch_elem_len; + u8 country_elem_len; + u8 pwr_constr_elem_len; + u8 quiet_elem_len; + u8 num_of_quiet_elem; /* can be more the one */ }; static inline struct ieee80211_local *hw_to_local( @@ -847,11 +871,6 @@ static inline struct ieee80211_hw *local_to_hw( return &local->hw; } -enum ieee80211_link_state_t { - IEEE80211_LINK_STATE_XOFF = 0, - IEEE80211_LINK_STATE_PENDING, -}; - struct sta_attribute { struct attribute attr; ssize_t (*show)(const struct sta_info *, char *buf); @@ -865,110 +884,83 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) } -/* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); -int ieee80211_if_config(struct net_device *dev); -int ieee80211_if_config_beacon(struct net_device *dev); +int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); -void ieee80211_if_setup(struct net_device *dev); u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, struct ieee80211_ht_info *req_ht_cap, struct ieee80211_ht_bss_info *req_bss_cap); +void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed); +void ieee80211_configure_filter(struct ieee80211_local *local); -/* ieee80211_ioctl.c */ +/* wireless extensions */ extern const struct iw_handler_def ieee80211_iw_handler_def; - -/* Least common multiple of the used rates (in 100 kbps). This is used to - * calculate rate_inv values for each rate so that only integers are needed. */ -#define CHAN_UTIL_RATE_LCM 95040 -/* 1 usec is 1/8 * (95040/10) = 1188 */ -#define CHAN_UTIL_PER_USEC 1188 -/* Amount of bits to shift the result right to scale the total utilization - * to values that will not wrap around 32-bit integers. */ -#define CHAN_UTIL_SHIFT 9 -/* Theoretical maximum of channel utilization counter in 10 ms (stat_time=1): - * (CHAN_UTIL_PER_USEC * 10000) >> CHAN_UTIL_SHIFT = 23203. So dividing the - * raw value with about 23 should give utilization in 10th of a percentage - * (1/1000). However, utilization is only estimated and not all intervals - * between frames etc. are calculated. 18 seems to give numbers that are closer - * to the real maximum. */ -#define CHAN_UTIL_PER_10MS 18 -#define CHAN_UTIL_HDR_LONG (202 * CHAN_UTIL_PER_USEC) -#define CHAN_UTIL_HDR_SHORT (40 * CHAN_UTIL_PER_USEC) - - -/* ieee80211_ioctl.c */ -int ieee80211_set_freq(struct net_device *dev, int freq); -/* ieee80211_sta.c */ -void ieee80211_sta_timer(unsigned long data); -void ieee80211_sta_work(struct work_struct *work); -void ieee80211_sta_scan_work(struct work_struct *work); -void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, +/* STA/IBSS code */ +void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); +void ieee80211_scan_work(struct work_struct *work); +void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); -int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len); -int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len); -int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid); -int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len); -void ieee80211_sta_req_auth(struct net_device *dev, +int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); +int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); +int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta); -int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len); -ieee80211_rx_result ieee80211_sta_rx_scan( - struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status); -void ieee80211_rx_bss_list_init(struct net_device *dev); -void ieee80211_rx_bss_list_deinit(struct net_device *dev); -int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len); -struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, - struct sk_buff *skb, u8 *bssid, - u8 *addr); -int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); -int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); -void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, - u32 changed); -void ieee80211_reset_erp_info(struct net_device *dev); -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info); -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info); -void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, - u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout); -void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, - u16 initiator, u16 reason_code); - -void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da, - u16 tid, u16 initiator, u16 reason); -void sta_rx_agg_session_timer_expired(unsigned long data); -void sta_addba_resp_timer_expired(unsigned long data); -void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr); +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 *bssid, + u8 *addr, u64 supp_rates); +int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); +int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); +u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); u64 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band); -void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, - int encrypt); -void ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems); - -#ifdef CONFIG_MAC80211_MESH -void ieee80211_start_mesh(struct net_device *dev); -#else -static inline void ieee80211_start_mesh(struct net_device *dev) -{} -#endif - -/* ieee80211_iface.c */ -int ieee80211_if_add(struct net_device *dev, const char *name, - struct net_device **new_dev, int type, +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len); + +/* scan/BSS handling */ +int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, + u8 *ssid, size_t ssid_len); +int ieee80211_scan_results(struct ieee80211_local *local, + struct iw_request_info *info, + char *buf, size_t len); +ieee80211_rx_result +ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); +void ieee80211_rx_bss_list_init(struct ieee80211_local *local); +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local); +int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, + char *ie, size_t len); + +void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); +int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, + u8 *ssid, size_t ssid_len); +struct ieee80211_bss * +ieee80211_bss_info_update(struct ieee80211_local *local, + struct ieee80211_rx_status *rx_status, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee802_11_elems *elems, + int freq, bool beacon); +struct ieee80211_bss * +ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len); +struct ieee80211_bss * +ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len); +void ieee80211_rx_bss_put(struct ieee80211_local *local, + struct ieee80211_bss *bss); + +/* interface handling */ +int ieee80211_if_add(struct ieee80211_local *local, const char *name, + struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params); -void ieee80211_if_set_type(struct net_device *dev, int type); -void ieee80211_if_reinit(struct net_device *dev); -void __ieee80211_if_del(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); -int ieee80211_if_remove(struct net_device *dev, const char *name, int id); -void ieee80211_if_free(struct net_device *dev); -void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata); +int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type); +void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); +void ieee80211_remove_interfaces(struct ieee80211_local *local); /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); @@ -977,15 +969,57 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); +/* HT */ +int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_ht_info *ht_info); +int ieee80211_ht_addt_info_ie_to_ht_bss_info( + struct ieee80211_ht_addt_info *ht_add_info_ie, + struct ieee80211_ht_bss_info *bss_info); +void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, + u16 tid, u16 initiator, u16 reason); +void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr); +void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_process_addba_resp(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len); +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len); + +/* Spectrum management */ +void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len); + /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ extern const unsigned char rfc1042_header[6]; extern const unsigned char bridge_tunnel_header[6]; u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type); + enum nl80211_iftype type); int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, int rate, int erp, int short_preamble); -void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, +void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, struct ieee80211_hdr *hdr); +void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + int encrypt); +void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems); +int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band); + +#ifdef CONFIG_MAC80211_NOINLINE +#define debug_noinline noinline +#else +#define debug_noinline +#endif #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 06e88a5a036d..8336fee68d3e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1,7 +1,10 @@ /* + * Interface handling (except master interface) + * * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> + * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,226 +19,563 @@ #include "sta_info.h" #include "debugfs_netdev.h" #include "mesh.h" +#include "led.h" -void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata) +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { - int i; + int meshhdrlen; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* Default values for sub-interface parameters */ - sdata->drop_unencrypted = 0; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - skb_queue_head_init(&sdata->fragments[i].skb_list); + meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0; - INIT_LIST_HEAD(&sdata->key_list); + /* FIX: what would be proper limits for MTU? + * This interface uses 802.3 frames. */ + if (new_mtu < 256 || + new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { + return -EINVAL; + } + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; + return 0; } -static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata) +static inline int identical_mac_addr_allowed(int type1, int type2) { - int i; - - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { - __skb_queue_purge(&sdata->fragments[i].skb_list); - } + return type1 == NL80211_IFTYPE_MONITOR || + type2 == NL80211_IFTYPE_MONITOR || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || + (type1 == NL80211_IFTYPE_WDS && + (type2 == NL80211_IFTYPE_WDS || + type2 == NL80211_IFTYPE_AP)) || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) || + (type1 == NL80211_IFTYPE_AP_VLAN && + (type2 == NL80211_IFTYPE_AP || + type2 == NL80211_IFTYPE_AP_VLAN)); } -/* Must be called with rtnl lock held. */ -int ieee80211_if_add(struct net_device *dev, const char *name, - struct net_device **new_dev, int type, - struct vif_params *params) +static int ieee80211_open(struct net_device *dev) { - struct net_device *ndev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = NULL; - int ret; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *nsdata; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + struct ieee80211_if_init_conf conf; + u32 changed = 0; + int res; + bool need_hw_reconfig = 0; + u8 null_addr[ETH_ALEN] = {0}; + + /* fail early if user set an invalid address */ + if (compare_ether_addr(dev->dev_addr, null_addr) && + !is_valid_ether_addr(dev->dev_addr)) + return -EADDRNOTAVAIL; + + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; + + if (ndev != dev && netif_running(ndev)) { + /* + * Allow only a single IBSS interface to be up at any + * time. This is restricted because beacon distribution + * cannot work properly if both are in the same IBSS. + * + * To remove this restriction we'd have to disallow them + * from setting the same SSID on different IBSS interfaces + * belonging to the same hardware. Then, however, we're + * faced with having to adopt two different TSF timers... + */ + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + nsdata->vif.type == NL80211_IFTYPE_ADHOC) + return -EBUSY; + + /* + * The remaining checks are only performed for interfaces + * with the same MAC address. + */ + if (compare_ether_addr(dev->dev_addr, ndev->dev_addr)) + continue; + + /* + * check whether it may have the same address + */ + if (!identical_mac_addr_allowed(sdata->vif.type, + nsdata->vif.type)) + return -ENOTUNIQ; + + /* + * can only add VLANs to enabled APs + */ + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + nsdata->vif.type == NL80211_IFTYPE_AP) + sdata->bss = &nsdata->u.ap; + } + } - ASSERT_RTNL(); - ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup); - if (!ndev) - return -ENOMEM; + switch (sdata->vif.type) { + case NL80211_IFTYPE_WDS: + if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) + return -ENOLINK; + break; + case NL80211_IFTYPE_AP_VLAN: + if (!sdata->bss) + return -ENOLINK; + list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + break; + case NL80211_IFTYPE_AP: + sdata->bss = &sdata->u.ap; + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + /* mesh ifaces must set allmulti to forward mcast traffic */ + atomic_inc(&local->iff_allmultis); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_ADHOC: + /* no special treatment */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + /* cannot happen */ + WARN_ON(1); + break; + } - ndev->needed_headroom = local->tx_headroom + - 4*6 /* four MAC addresses */ - + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ - + 6 /* mesh */ - + 8 /* rfc1042/bridge tunnel */ - - ETH_HLEN /* ethernet hard_header_len */ - + IEEE80211_ENCRYPT_HEADROOM; - ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + if (local->open_count == 0) { + res = 0; + if (local->ops->start) + res = local->ops->start(local_to_hw(local)); + if (res) + goto err_del_bss; + need_hw_reconfig = 1; + ieee80211_led_radio(local, local->hw.conf.radio_enabled); + } - ret = dev_alloc_name(ndev, ndev->name); - if (ret < 0) - goto fail; + /* + * Check all interfaces and copy the hopefully now-present + * MAC address to those that have the special null one. + */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; + + /* + * No need to check netif_running since we do not allow + * it to start up with this invalid address. + */ + if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) + memcpy(ndev->dev_addr, + local->hw.wiphy->perm_addr, + ETH_ALEN); + } - memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); - ndev->base_addr = dev->base_addr; - ndev->irq = dev->irq; - ndev->mem_start = dev->mem_start; - ndev->mem_end = dev->mem_end; - SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); + if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0) + memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, + ETH_ALEN); - sdata = IEEE80211_DEV_TO_SUB_IF(ndev); - ndev->ieee80211_ptr = &sdata->wdev; - sdata->wdev.wiphy = local->hw.wiphy; - sdata->vif.type = IEEE80211_IF_TYPE_AP; - sdata->dev = ndev; - sdata->local = local; - ieee80211_if_sdata_init(sdata); + /* + * Validate the MAC address for this device. + */ + if (!is_valid_ether_addr(dev->dev_addr)) { + if (!local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + return -EADDRNOTAVAIL; + } - ret = register_netdevice(ndev); - if (ret) - goto fail; + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + /* no need to tell driver */ + break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + local->cooked_mntrs++; + break; + } - ieee80211_debugfs_add_netdev(sdata); - ieee80211_if_set_type(ndev, type); + /* must be before the call to ieee80211_configure_filter */ + local->monitors++; + if (local->monitors == 1) + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + + if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) + local->fif_fcsfail++; + if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) + local->fif_plcpfail++; + if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) + local->fif_control++; + if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) + local->fif_other_bss++; + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + /* fall through */ + default: + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) + goto err_stop; - if (ieee80211_vif_is_mesh(&sdata->vif) && - params && params->mesh_id_len) - ieee80211_if_sta_set_mesh_id(&sdata->u.sta, - params->mesh_id_len, - params->mesh_id); - - /* we're under RTNL so all this is fine */ - if (unlikely(local->reg_state == IEEE80211_DEV_UNREGISTERED)) { - __ieee80211_if_del(local, sdata); - return -ENODEV; + if (ieee80211_vif_is_mesh(&sdata->vif)) + ieee80211_start_mesh(sdata); + changed |= ieee80211_reset_erp_info(sdata); + ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_enable_keys(sdata); + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + netif_carrier_off(dev); + else + netif_carrier_on(dev); } - list_add_tail_rcu(&sdata->list, &local->interfaces); - if (new_dev) - *new_dev = ndev; + if (sdata->vif.type == NL80211_IFTYPE_WDS) { + /* Create STA entry for the WDS peer */ + sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, + GFP_KERNEL); + if (!sta) { + res = -ENOMEM; + goto err_del_interface; + } - return 0; + /* no locking required since STA is not live yet */ + sta->flags |= WLAN_STA_AUTHORIZED; -fail: - free_netdev(ndev); - return ret; + res = sta_info_insert(sta); + if (res) { + /* STA has been freed */ + goto err_del_interface; + } + } + + if (local->open_count == 0) { + res = dev_open(local->mdev); + WARN_ON(res); + if (res) + goto err_del_interface; + tasklet_enable(&local->tx_pending_tasklet); + tasklet_enable(&local->tasklet); + } + + /* + * set_multicast_list will be invoked by the networking core + * which will check whether any increments here were done in + * error and sync them down to the hardware as filter flags. + */ + if (sdata->flags & IEEE80211_SDATA_ALLMULTI) + atomic_inc(&local->iff_allmultis); + + if (sdata->flags & IEEE80211_SDATA_PROMISC) + atomic_inc(&local->iff_promiscs); + + local->open_count++; + if (need_hw_reconfig) { + ieee80211_hw_config(local); + /* + * set default queue parameters so drivers don't + * need to initialise the hardware if the hardware + * doesn't start up with sane defaults + */ + ieee80211_set_wmm_default(sdata); + } + + /* + * ieee80211_sta_work is disabled while network interface + * is down. Therefore, some configuration changes may not + * yet be effective. Trigger execution of ieee80211_sta_work + * to fix this. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + queue_work(local->hw.workqueue, &ifsta->work); + } + + netif_tx_start_all_queues(dev); + + return 0; + err_del_interface: + local->ops->remove_interface(local_to_hw(local), &conf); + err_stop: + if (!local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + err_del_bss: + sdata->bss = NULL; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + list_del(&sdata->u.vlan.list); + return res; } -void ieee80211_if_set_type(struct net_device *dev, int type) +static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int oldtype = sdata->vif.type; + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_init_conf conf; + struct sta_info *sta; /* - * We need to call this function on the master interface - * which already has a hard_start_xmit routine assigned - * which must not be changed. + * Stop TX on this interface first. */ - if (dev != sdata->local->mdev) - dev->hard_start_xmit = ieee80211_subif_start_xmit; + netif_tx_stop_all_queues(dev); /* - * Called even when register_netdevice fails, it would - * oops if assigned before initialising the rest. + * Now delete all active aggregation sessions. */ - dev->uninit = ieee80211_if_reinit; + rcu_read_lock(); - /* most have no BSS pointer */ - sdata->bss = NULL; - sdata->vif.type = type; + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sta->sdata == sdata) + ieee80211_sta_tear_down_BA_sessions(sdata, + sta->sta.addr); + } - sdata->basic_rates = 0; + rcu_read_unlock(); - switch (type) { - case IEEE80211_IF_TYPE_WDS: - /* nothing special */ - break; - case IEEE80211_IF_TYPE_VLAN: - sdata->u.vlan.ap = NULL; - break; - case IEEE80211_IF_TYPE_AP: - sdata->u.ap.force_unicast_rateidx = -1; - sdata->u.ap.max_ratectrl_rateidx = -1; - skb_queue_head_init(&sdata->u.ap.ps_bc_buf); - sdata->bss = &sdata->u.ap; - INIT_LIST_HEAD(&sdata->u.ap.vlans); - break; - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: { - struct ieee80211_sub_if_data *msdata; - struct ieee80211_if_sta *ifsta; - - ifsta = &sdata->u.sta; - INIT_WORK(&ifsta->work, ieee80211_sta_work); - setup_timer(&ifsta->timer, ieee80211_sta_timer, - (unsigned long) sdata); - skb_queue_head_init(&ifsta->skb_queue); - - ifsta->capab = WLAN_CAPABILITY_ESS; - ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | - IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->flags |= IEEE80211_STA_CREATE_IBSS | - IEEE80211_STA_WMM_ENABLED | - IEEE80211_STA_AUTO_BSSID_SEL | - IEEE80211_STA_AUTO_CHANNEL_SEL; - - msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev); - sdata->bss = &msdata->u.ap; + /* + * Remove all stations associated with this interface. + * + * This must be done before calling ops->remove_interface() + * because otherwise we can later invoke ops->sta_notify() + * whenever the STAs are removed, and that invalidates driver + * assumptions about always getting a vif pointer that is valid + * (because if we remove a STA after ops->remove_interface() + * the driver will have removed the vif info already!) + * + * We could relax this and only unlink the stations from the + * hash table and list but keep them on a per-sdata list that + * will be inserted back again when the interface is brought + * up again, but I don't currently see a use case for that, + * except with WDS which gets a STA entry created when it is + * brought up. + */ + sta_info_flush(local, sdata); - if (ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_mesh_init_sdata(sdata); - break; + /* + * Don't count this interface for promisc/allmulti while it + * is down. dev_mc_unsync() will invoke set_multicast_list + * on the master interface which will sync these down to the + * hardware as filter flags. + */ + if (sdata->flags & IEEE80211_SDATA_ALLMULTI) + atomic_dec(&local->iff_allmultis); + + if (sdata->flags & IEEE80211_SDATA_PROMISC) + atomic_dec(&local->iff_promiscs); + + dev_mc_unsync(local->mdev, dev); + + /* APs need special treatment */ + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmp; + struct beacon_data *old_beacon = sdata->u.ap.beacon; + + /* remove beacon */ + rcu_assign_pointer(sdata->u.ap.beacon, NULL); + synchronize_rcu(); + kfree(old_beacon); + + /* down all dependent devices, that is VLANs */ + list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - case IEEE80211_IF_TYPE_MNTR: - dev->type = ARPHRD_IEEE80211_RADIOTAP; - dev->hard_start_xmit = ieee80211_monitor_start_xmit; - sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | - MONITOR_FLAG_OTHER_BSS; + + local->open_count--; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + list_del(&sdata->u.vlan.list); + /* no need to tell driver */ break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + local->cooked_mntrs--; + break; + } + + local->monitors--; + if (local->monitors == 0) + local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + + if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) + local->fif_fcsfail--; + if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) + local->fif_plcpfail--; + if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) + local->fif_control--; + if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) + local->fif_other_bss--; + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED; + memset(sdata->u.sta.bssid, 0, ETH_ALEN); + del_timer_sync(&sdata->u.sta.timer); + /* + * If the timer fired while we waited for it, it will have + * requeued the work. Now the work will be running again + * but will not rearm the timer again because it checks + * whether the interface is running, which, at this point, + * it no longer is. + */ + cancel_work_sync(&sdata->u.sta.work); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.sta.skb_queue); + + sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; + kfree(sdata->u.sta.extra_ie); + sdata->u.sta.extra_ie = NULL; + sdata->u.sta.extra_ie_len = 0; + /* fall through */ + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* allmulti is always set on mesh ifaces */ + atomic_dec(&local->iff_allmultis); + ieee80211_stop_mesh(sdata); + } + /* fall through */ default: - printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x", - dev->name, __func__, type); + if (local->scan_sdata == sdata) { + if (!local->ops->hw_scan) + cancel_delayed_work_sync(&local->scan_work); + /* + * The software scan can no longer run now, so we can + * clear out the scan_sdata reference. However, the + * hardware scan may still be running. The complete + * function must be prepared to handle a NULL value. + */ + local->scan_sdata = NULL; + /* + * The memory barrier guarantees that another CPU + * that is hardware-scanning will now see the fact + * that this interface is gone. + */ + smp_mb(); + /* + * If software scanning, complete the scan but since + * the scan_sdata is NULL already don't send out a + * scan event to userspace -- the scan is incomplete. + */ + if (local->sw_scanning) + ieee80211_scan_completed(&local->hw); + } + + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = dev->dev_addr; + /* disable all keys for as long as this netdev is down */ + ieee80211_disable_keys(sdata); + local->ops->remove_interface(local_to_hw(local), &conf); + } + + sdata->bss = NULL; + + if (local->open_count == 0) { + if (netif_running(local->mdev)) + dev_close(local->mdev); + + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); + + ieee80211_led_radio(local, 0); + + flush_workqueue(local->hw.workqueue); + + tasklet_disable(&local->tx_pending_tasklet); + tasklet_disable(&local->tasklet); } - ieee80211_debugfs_change_if_type(sdata, oldtype); + + return 0; } -/* Must be called with rtnl lock held. */ -void ieee80211_if_reinit(struct net_device *dev) +static void ieee80211_set_multicast_list(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int allmulti, promisc, sdata_allmulti, sdata_promisc; + + allmulti = !!(dev->flags & IFF_ALLMULTI); + promisc = !!(dev->flags & IFF_PROMISC); + sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); + sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); + + if (allmulti != sdata_allmulti) { + if (dev->flags & IFF_ALLMULTI) + atomic_inc(&local->iff_allmultis); + else + atomic_dec(&local->iff_allmultis); + sdata->flags ^= IEEE80211_SDATA_ALLMULTI; + } + + if (promisc != sdata_promisc) { + if (dev->flags & IFF_PROMISC) + atomic_inc(&local->iff_promiscs); + else + atomic_dec(&local->iff_promiscs); + sdata->flags ^= IEEE80211_SDATA_PROMISC; + } + + dev_mc_sync(local->mdev, dev); +} + +static void ieee80211_if_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->hard_start_xmit = ieee80211_subif_start_xmit; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->set_multicast_list = ieee80211_set_multicast_list; + dev->change_mtu = ieee80211_change_mtu; + dev->open = ieee80211_open; + dev->stop = ieee80211_stop; + dev->destructor = free_netdev; + /* we will validate the address ourselves in ->open */ + dev->validate_addr = NULL; +} +/* + * Called when the netdev is removed or, by the code below, before + * the interface type changes. + */ +static void ieee80211_teardown_sdata(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct beacon_data *beacon; struct sk_buff *skb; int flushed; + int i; - ASSERT_RTNL(); - + /* free extra data */ ieee80211_free_keys(sdata); - ieee80211_if_sdata_deinit(sdata); + ieee80211_debugfs_remove_netdev(sdata); - /* Need to handle mesh specially to allow eliding the function call */ - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rmc_free(dev); + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) + __skb_queue_purge(&sdata->fragments[i].skb_list); + sdata->fragment_next = 0; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - /* cannot happen */ - WARN_ON(1); - break; - case IEEE80211_IF_TYPE_AP: { - /* Remove all virtual interfaces that use this BSS - * as their sdata->bss */ - struct ieee80211_sub_if_data *tsdata, *n; - struct beacon_data *beacon; - - list_for_each_entry_safe(tsdata, n, &local->interfaces, list) { - if (tsdata != sdata && tsdata->bss == &sdata->u.ap) { - printk(KERN_DEBUG "%s: removing virtual " - "interface %s because its BSS interface" - " is being removed\n", - sdata->dev->name, tsdata->dev->name); - list_del_rcu(&tsdata->list); - /* - * We have lots of time and can afford - * to sync for each interface - */ - synchronize_rcu(); - __ieee80211_if_del(local, tsdata); - } - } - + case NL80211_IFTYPE_AP: beacon = sdata->u.ap.beacon; rcu_assign_pointer(sdata->u.ap.beacon, NULL); synchronize_rcu(); @@ -247,77 +587,205 @@ void ieee80211_if_reinit(struct net_device *dev) } break; - } - case IEEE80211_IF_TYPE_WDS: - /* nothing to do */ + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_rmc_free(sdata); break; - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: kfree(sdata->u.sta.extra_ie); - sdata->u.sta.extra_ie = NULL; kfree(sdata->u.sta.assocreq_ies); - sdata->u.sta.assocreq_ies = NULL; kfree(sdata->u.sta.assocresp_ies); - sdata->u.sta.assocresp_ies = NULL; - if (sdata->u.sta.probe_resp) { - dev_kfree_skb(sdata->u.sta.probe_resp); - sdata->u.sta.probe_resp = NULL; - } - + kfree_skb(sdata->u.sta.probe_resp); break; - case IEEE80211_IF_TYPE_MNTR: - dev->type = ARPHRD_ETHER; + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: break; - case IEEE80211_IF_TYPE_VLAN: - sdata->u.vlan.ap = NULL; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + BUG(); break; } flushed = sta_info_flush(local, sdata); WARN_ON(flushed); +} +/* + * Helper function to initialise an interface to a specific type. + */ +static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + /* clear type-dependent union */ memset(&sdata->u, 0, sizeof(sdata->u)); - ieee80211_if_sdata_init(sdata); + + /* and set some type-dependent values */ + sdata->vif.type = type; + sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->wdev.iftype = type; + + /* only monitor differs */ + sdata->dev->type = ARPHRD_ETHER; + + switch (type) { + case NL80211_IFTYPE_AP: + skb_queue_head_init(&sdata->u.ap.ps_bc_buf); + INIT_LIST_HEAD(&sdata->u.ap.vlans); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + ieee80211_sta_setup_sdata(sdata); + break; + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif)) + ieee80211_mesh_init_sdata(sdata); + break; + case NL80211_IFTYPE_MONITOR: + sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; + sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; + sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | + MONITOR_FLAG_OTHER_BSS; + break; + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + BUG(); + break; + } + + ieee80211_debugfs_add_netdev(sdata); } -/* Must be called with rtnl lock held. */ -void __ieee80211_if_del(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) { - struct net_device *dev = sdata->dev; + ASSERT_RTNL(); - ieee80211_debugfs_remove_netdev(sdata); - unregister_netdevice(dev); - /* Except master interface, the net_device will be freed by - * net_device->destructor (i. e. ieee80211_if_free). */ + if (type == sdata->vif.type) + return 0; + + /* + * We could, here, on changes between IBSS/STA/MESH modes, + * invoke an MLME function instead that disassociates etc. + * and goes into the requested mode. + */ + + if (netif_running(sdata->dev)) + return -EBUSY; + + /* Purge and reset type-dependent state. */ + ieee80211_teardown_sdata(sdata->dev); + ieee80211_setup_sdata(sdata, type); + + /* reset some values that shouldn't be kept across type changes */ + sdata->bss_conf.basic_rates = + ieee80211_mandatory_rates(sdata->local, + sdata->local->hw.conf.channel->band); + sdata->drop_unencrypted = 0; + + return 0; } -/* Must be called with rtnl lock held. */ -int ieee80211_if_remove(struct net_device *dev, const char *name, int id) +int ieee80211_if_add(struct ieee80211_local *local, const char *name, + struct net_device **new_dev, enum nl80211_iftype type, + struct vif_params *params) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata, *n; + struct net_device *ndev; + struct ieee80211_sub_if_data *sdata = NULL; + int ret, i; ASSERT_RTNL(); - list_for_each_entry_safe(sdata, n, &local->interfaces, list) { - if ((sdata->vif.type == id || id == -1) && - strcmp(name, sdata->dev->name) == 0 && - sdata->dev != local->mdev) { - list_del_rcu(&sdata->list); - synchronize_rcu(); - __ieee80211_if_del(local, sdata); - return 0; - } - } - return -ENODEV; + ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, + name, ieee80211_if_setup); + if (!ndev) + return -ENOMEM; + + ndev->needed_headroom = local->tx_headroom + + 4*6 /* four MAC addresses */ + + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ + + 6 /* mesh */ + + 8 /* rfc1042/bridge tunnel */ + - ETH_HLEN /* ethernet hard_header_len */ + + IEEE80211_ENCRYPT_HEADROOM; + ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + + ret = dev_alloc_name(ndev, ndev->name); + if (ret < 0) + goto fail; + + memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); + + /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ + sdata = netdev_priv(ndev); + ndev->ieee80211_ptr = &sdata->wdev; + + /* initialise type-independent data */ + sdata->wdev.wiphy = local->hw.wiphy; + sdata->local = local; + sdata->dev = ndev; + + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) + skb_queue_head_init(&sdata->fragments[i].skb_list); + + INIT_LIST_HEAD(&sdata->key_list); + + sdata->force_unicast_rateidx = -1; + sdata->max_ratectrl_rateidx = -1; + + /* setup type-dependent data */ + ieee80211_setup_sdata(sdata, type); + + ret = register_netdevice(ndev); + if (ret) + goto fail; + + ndev->uninit = ieee80211_teardown_sdata; + + if (ieee80211_vif_is_mesh(&sdata->vif) && + params && params->mesh_id_len) + ieee80211_sdata_set_mesh_id(sdata, + params->mesh_id_len, + params->mesh_id); + + list_add_tail_rcu(&sdata->list, &local->interfaces); + + if (new_dev) + *new_dev = ndev; + + return 0; + + fail: + free_netdev(ndev); + return ret; } -void ieee80211_if_free(struct net_device *dev) +void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + ASSERT_RTNL(); - ieee80211_if_sdata_deinit(sdata); - free_netdev(dev); + list_del_rcu(&sdata->list); + synchronize_rcu(); + unregister_netdevice(sdata->dev); +} + +/* + * Remove all interfaces, may only be called at hardware unregistration + * time because it doesn't do RCU-safe list removals. + */ +void ieee80211_remove_interfaces(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata, *tmp; + + ASSERT_RTNL(); + + list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + list_del(&sdata->list); + unregister_netdevice(sdata->dev); + } } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 150d66dbda9d..a5b06fe71980 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -118,12 +118,12 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key) * address to indicate a transmit-only key. */ if (key->conf.alg != ALG_WEP && - (key->sdata->vif.type == IEEE80211_IF_TYPE_AP || - key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) + (key->sdata->vif.type == NL80211_IFTYPE_AP || + key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) addr = zero_addr; if (key->sta) - addr = key->sta->addr; + addr = key->sta->sta.addr; return addr; } @@ -281,6 +281,20 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.alg = alg; key->conf.keyidx = idx; key->conf.keylen = key_len; + switch (alg) { + case ALG_WEP: + key->conf.iv_len = WEP_IV_LEN; + key->conf.icv_len = WEP_ICV_LEN; + break; + case ALG_TKIP: + key->conf.iv_len = TKIP_IV_LEN; + key->conf.icv_len = TKIP_ICV_LEN; + break; + case ALG_CCMP: + key->conf.iv_len = CCMP_HDR_LEN; + key->conf.icv_len = CCMP_MIC_LEN; + break; + } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); INIT_LIST_HEAD(&key->todo); @@ -321,10 +335,17 @@ void ieee80211_key_link(struct ieee80211_key *key, * some hardware cannot handle TKIP with QoS, so * we indicate whether QoS could be in use. */ - if (sta->flags & WLAN_STA_WME) + if (test_sta_flags(sta, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; + + /* + * This key is for a specific sta interface, + * inform the driver that it should try to store + * this key as pairwise key. + */ + key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; } else { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct sta_info *ap; /* @@ -335,7 +356,7 @@ void ieee80211_key_link(struct ieee80211_key *key, /* same here, the AP could be using QoS */ ap = sta_info_get(key->local, key->sdata->u.sta.bssid); if (ap) { - if (ap->flags & WLAN_STA_WME) + if (test_sta_flags(ap, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } @@ -380,6 +401,15 @@ void ieee80211_key_free(struct ieee80211_key *key) if (!key) return; + if (!key->sdata) { + /* The key has not been linked yet, simply free it + * and don't Oops */ + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + kfree(key); + return; + } + spin_lock_irqsave(&key->sdata->local->key_lock, flags); __ieee80211_key_free(key); spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index f52c3df1fe9a..425816e0996c 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -16,31 +16,18 @@ #include <linux/rcupdate.h> #include <net/mac80211.h> -/* ALG_TKIP - * struct ieee80211_key::key is encoded as a 256-bit (32 byte) data block: - * Temporal Encryption Key (128 bits) - * Temporal Authenticator Tx MIC Key (64 bits) - * Temporal Authenticator Rx MIC Key (64 bits) - */ - -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 - -#define ALG_TKIP_KEY_LEN 32 -/* Starting offsets for each key */ -#define ALG_TKIP_TEMP_ENCR_KEY 0 -#define ALG_TKIP_TEMP_AUTH_TX_MIC_KEY 16 -#define ALG_TKIP_TEMP_AUTH_RX_MIC_KEY 24 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 - -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 - -#define NUM_RX_DATA_QUEUES 17 +#define WEP_IV_LEN 4 +#define WEP_ICV_LEN 4 +#define ALG_TKIP_KEY_LEN 32 +#define ALG_CCMP_KEY_LEN 16 +#define CCMP_HDR_LEN 8 +#define CCMP_MIC_LEN 8 +#define CCMP_TK_LEN 16 +#define CCMP_PN_LEN 6 +#define TKIP_IV_LEN 8 +#define TKIP_ICV_LEN 4 + +#define NUM_RX_DATA_QUEUES 17 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -69,6 +56,13 @@ enum ieee80211_internal_key_flags { KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), }; +struct tkip_ctx { + u32 iv32; + u16 iv16; + u16 p1k[5]; + int initialized; +}; + struct ieee80211_key { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -85,16 +79,10 @@ struct ieee80211_key { union { struct { /* last used TSC */ - u32 iv32; - u16 iv16; - u16 p1k[5]; - int tx_initialized; + struct tkip_ctx tx; /* last received RSC */ - u32 iv32_rx[NUM_RX_DATA_QUEUES]; - u16 iv16_rx[NUM_RX_DATA_QUEUES]; - u16 p1k_rx[NUM_RX_DATA_QUEUES][5]; - int rx_initialized[NUM_RX_DATA_QUEUES]; + struct tkip_ctx rx[NUM_RX_DATA_QUEUES]; } tkip; struct { u8 tx_pn[6]; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 98c0b5e56ecc..ae62ad40ad63 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -35,8 +35,6 @@ #include "debugfs.h" #include "debugfs_netdev.h" -#define SUPP_MCS_SET_LEN 16 - /* * For seeing transmitted packets on monitor interfaces * we have a radiotap header too. @@ -47,16 +45,9 @@ struct ieee80211_tx_status_rtap_hdr { u8 data_retries; } __attribute__ ((packed)); -/* common interface routines */ - -static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) -{ - memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ - return ETH_ALEN; -} /* must be called under mdev tx lock */ -static void ieee80211_configure_filter(struct ieee80211_local *local) +void ieee80211_configure_filter(struct ieee80211_local *local) { unsigned int changed_flags; unsigned int new_flags = 0; @@ -99,30 +90,52 @@ static void ieee80211_configure_filter(struct ieee80211_local *local) /* master interface */ +static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) +{ + memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ + return ETH_ALEN; +} + +static const struct header_ops ieee80211_header_ops = { + .create = eth_header, + .parse = header_parse_80211, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + static int ieee80211_master_open(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; int res = -EOPNOTSUPP; /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->dev != dev && netif_running(sdata->dev)) { + if (netif_running(sdata->dev)) { res = 0; break; } } - return res; + + if (res) + return res; + + netif_tx_start_all_queues(local->mdev); + + return 0; } static int ieee80211_master_stop(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(sdata, &local->interfaces, list) - if (sdata->dev != dev && netif_running(sdata->dev)) + if (netif_running(sdata->dev)) dev_close(sdata->dev); return 0; @@ -130,898 +143,58 @@ static int ieee80211_master_stop(struct net_device *dev) static void ieee80211_master_set_multicast_list(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; ieee80211_configure_filter(local); } -/* regular interfaces */ - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - int meshhdrlen; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - meshhdrlen = (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) ? 5 : 0; - - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || - new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - -static inline int identical_mac_addr_allowed(int type1, int type2) -{ - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); -} - -static int ieee80211_open(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_init_conf conf; - int res; - bool need_hw_reconfig = 0; - struct sta_info *sta; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - /* we hold the RTNL here so can safely walk the list */ - list_for_each_entry(nsdata, &local->interfaces, list) { - struct net_device *ndev = nsdata->dev; - - if (ndev != dev && ndev != local->mdev && netif_running(ndev)) { - /* - * Allow only a single IBSS interface to be up at any - * time. This is restricted because beacon distribution - * cannot work properly if both are in the same IBSS. - * - * To remove this restriction we'd have to disallow them - * from setting the same SSID on different IBSS interfaces - * belonging to the same hardware. Then, however, we're - * faced with having to adopt two different TSF timers... - */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - nsdata->vif.type == IEEE80211_IF_TYPE_IBSS) - return -EBUSY; - - /* - * Disallow multiple IBSS/STA mode interfaces. - * - * This is a technical restriction, it is possible although - * most likely not IEEE 802.11 compliant to have multiple - * STAs with just a single hardware (the TSF timer will not - * be adjusted properly.) - * - * However, because mac80211 uses the master device's BSS - * information for each STA/IBSS interface, doing this will - * currently corrupt that BSS information completely, unless, - * a not very useful case, both STAs are associated to the - * same BSS. - * - * To remove this restriction, the BSS information needs to - * be embedded in the STA/IBSS mode sdata instead of using - * the master device's BSS structure. - */ - if ((sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) && - (nsdata->vif.type == IEEE80211_IF_TYPE_STA || - nsdata->vif.type == IEEE80211_IF_TYPE_IBSS)) - return -EBUSY; - - /* - * The remaining checks are only performed for interfaces - * with the same MAC address. - */ - if (compare_ether_addr(dev->dev_addr, ndev->dev_addr)) - continue; - - /* - * check whether it may have the same address - */ - if (!identical_mac_addr_allowed(sdata->vif.type, - nsdata->vif.type)) - return -ENOTUNIQ; - - /* - * can only add VLANs to enabled APs - */ - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN && - nsdata->vif.type == IEEE80211_IF_TYPE_AP) - sdata->u.vlan.ap = nsdata; - } - } - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_WDS: - if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - break; - case IEEE80211_IF_TYPE_VLAN: - if (!sdata->u.vlan.ap) - return -ENOLINK; - break; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_MESH_POINT: - /* no special treatment */ - break; - case IEEE80211_IF_TYPE_INVALID: - /* cannot happen */ - WARN_ON(1); - break; - } - - if (local->open_count == 0) { - res = 0; - if (local->ops->start) - res = local->ops->start(local_to_hw(local)); - if (res) - return res; - need_hw_reconfig = 1; - ieee80211_led_radio(local, local->hw.conf.radio_enabled); - } - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: - list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); - /* no need to tell driver */ - break; - case IEEE80211_IF_TYPE_MNTR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs++; - break; - } - - /* must be before the call to ieee80211_configure_filter */ - local->monitors++; - if (local->monitors == 1) - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - - if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) - local->fif_fcsfail++; - if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) - local->fif_plcpfail++; - if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) - local->fif_control++; - if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) - local->fif_other_bss++; - - netif_tx_lock_bh(local->mdev); - ieee80211_configure_filter(local); - netif_tx_unlock_bh(local->mdev); - break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; - /* fall through */ - default: - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) - goto err_stop; - - ieee80211_if_config(dev); - ieee80211_reset_erp_info(dev); - ieee80211_enable_keys(sdata); - - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) - netif_carrier_off(dev); - else - netif_carrier_on(dev); - } - - if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { - /* Create STA entry for the WDS peer */ - sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, - GFP_KERNEL); - if (!sta) { - res = -ENOMEM; - goto err_del_interface; - } - - sta->flags |= WLAN_STA_AUTHORIZED; - - res = sta_info_insert(sta); - if (res) { - /* STA has been freed */ - goto err_del_interface; - } - } - - if (local->open_count == 0) { - res = dev_open(local->mdev); - WARN_ON(res); - if (res) - goto err_del_interface; - tasklet_enable(&local->tx_pending_tasklet); - tasklet_enable(&local->tasklet); - } - - /* - * set_multicast_list will be invoked by the networking core - * which will check whether any increments here were done in - * error and sync them down to the hardware as filter flags. - */ - if (sdata->flags & IEEE80211_SDATA_ALLMULTI) - atomic_inc(&local->iff_allmultis); - - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_inc(&local->iff_promiscs); - - local->open_count++; - if (need_hw_reconfig) - ieee80211_hw_config(local); - - /* - * ieee80211_sta_work is disabled while network interface - * is down. Therefore, some configuration changes may not - * yet be effective. Trigger execution of ieee80211_sta_work - * to fix this. - */ - if(sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - queue_work(local->hw.workqueue, &ifsta->work); - } - - netif_start_queue(dev); - - return 0; - err_del_interface: - local->ops->remove_interface(local_to_hw(local), &conf); - err_stop: - if (!local->open_count && local->ops->stop) - local->ops->stop(local_to_hw(local)); - return res; -} +/* everything else */ -static int ieee80211_stop(struct net_device *dev) +int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_if_init_conf conf; - struct sta_info *sta; - - /* - * Stop TX on this interface first. - */ - netif_stop_queue(dev); - - /* - * Now delete all active aggregation sessions. - */ - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(dev, sta->addr); - } - - rcu_read_unlock(); - - /* - * Remove all stations associated with this interface. - * - * This must be done before calling ops->remove_interface() - * because otherwise we can later invoke ops->sta_notify() - * whenever the STAs are removed, and that invalidates driver - * assumptions about always getting a vif pointer that is valid - * (because if we remove a STA after ops->remove_interface() - * the driver will have removed the vif info already!) - * - * We could relax this and only unlink the stations from the - * hash table and list but keep them on a per-sdata list that - * will be inserted back again when the interface is brought - * up again, but I don't currently see a use case for that, - * except with WDS which gets a STA entry created when it is - * brought up. - */ - sta_info_flush(local, sdata); - - /* - * Don't count this interface for promisc/allmulti while it - * is down. dev_mc_unsync() will invoke set_multicast_list - * on the master interface which will sync these down to the - * hardware as filter flags. - */ - if (sdata->flags & IEEE80211_SDATA_ALLMULTI) - atomic_dec(&local->iff_allmultis); - - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_dec(&local->iff_promiscs); - - dev_mc_unsync(local->mdev, dev); - - /* APs need special treatment */ - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { - struct ieee80211_sub_if_data *vlan, *tmp; - struct beacon_data *old_beacon = sdata->u.ap.beacon; - - /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); - synchronize_rcu(); - kfree(old_beacon); - - /* down all dependent devices, that is VLANs */ - list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, - u.vlan.list) - dev_close(vlan->dev); - WARN_ON(!list_empty(&sdata->u.ap.vlans)); - } - - local->open_count--; - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: - list_del(&sdata->u.vlan.list); - sdata->u.vlan.ap = NULL; - /* no need to tell driver */ - break; - case IEEE80211_IF_TYPE_MNTR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs--; - break; - } - - local->monitors--; - if (local->monitors == 0) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - - if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) - local->fif_fcsfail--; - if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) - local->fif_plcpfail--; - if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) - local->fif_control--; - if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) - local->fif_other_bss--; - - netif_tx_lock_bh(local->mdev); - ieee80211_configure_filter(local); - netif_tx_unlock_bh(local->mdev); - break; - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.state = IEEE80211_DISABLED; - memset(sdata->u.sta.bssid, 0, ETH_ALEN); - del_timer_sync(&sdata->u.sta.timer); - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - synchronize_rcu(); - skb_queue_purge(&sdata->u.sta.skb_queue); - - if (local->scan_dev == sdata->dev) { - if (!local->ops->hw_scan) { - local->sta_sw_scanning = 0; - cancel_delayed_work(&local->scan_work); - } else - local->sta_hw_scanning = 0; - } - - flush_workqueue(local->hw.workqueue); - - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; - kfree(sdata->u.sta.extra_ie); - sdata->u.sta.extra_ie = NULL; - sdata->u.sta.extra_ie_len = 0; - /* fall through */ - default: - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = dev->dev_addr; - /* disable all keys for as long as this netdev is down */ - ieee80211_disable_keys(sdata); - local->ops->remove_interface(local_to_hw(local), &conf); - } - - if (local->open_count == 0) { - if (netif_running(local->mdev)) - dev_close(local->mdev); - - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - - ieee80211_led_radio(local, 0); - - tasklet_disable(&local->tx_pending_tasklet); - tasklet_disable(&local->tasklet); - } - - return 0; -} - -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - u16 start_seq_num = 0; - u8 *state; - int ret; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) - return -EINVAL; - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Open BA session requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { - printk(KERN_DEBUG "Could not find the station\n"); - rcu_read_unlock(); - return -ENOENT; - } - - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - - /* we have tried too many times, receiver does not want A-MPDU */ - if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { - ret = -EBUSY; - goto start_ba_exit; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID is not in aggregation flow already */ - if (*state != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - session is not " - "idle on tid %u\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = -EAGAIN; - goto start_ba_exit; - } - - /* prepare A-MPDU MLME for Tx aggregation */ - sta->ampdu_mlme.tid_tx[tid] = - kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_tx[tid]) { - if (net_ratelimit()) - printk(KERN_ERR "allocate tx mlme to tid %d failed\n", - tid); - ret = -ENOMEM; - goto start_ba_exit; - } - /* Tx timer */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = - sta_addba_resp_timer_expired; - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - - /* ensure that TX flow won't interrupt us - * until the end of the call to requeue function */ - spin_lock_bh(&local->mdev->queue_lock); - - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); - - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - queue unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto start_ba_err; - } - sdata = sta->sdata; - - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the - * call back right away, it must see that the flow has begun */ - *state |= HT_ADDBA_REQUESTED_MSK; - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, - ra, tid, &start_seq_num); - - if (ret) { - /* No need to requeue the packets in the agg queue, since we - * held the tx lock: no packet could be enqueued to the newly - * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - HW unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - *state = HT_AGG_STATE_IDLE; - goto start_ba_err; - } - - /* Will put all the packets in the new SW queue */ - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); - spin_unlock_bh(&local->mdev->queue_lock); - - /* send an addBA request */ - sta->ampdu_mlme.dialog_token_allocator++; - sta->ampdu_mlme.tid_tx[tid]->dialog_token = - sta->ampdu_mlme.dialog_token_allocator; - sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - - ieee80211_send_addba_request(sta->sdata->dev, ra, tid, - sta->ampdu_mlme.tid_tx[tid]->dialog_token, - sta->ampdu_mlme.tid_tx[tid]->ssn, - 0x40, 5000); - - /* activate the timer for the recipient's addBA response */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = - jiffies + ADDBA_RESP_INTERVAL; - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); - goto start_ba_exit; - -start_ba_err: - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&local->mdev->queue_lock); - ret = -EBUSY; -start_ba_exit: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_session); + struct ieee80211_if_conf conf; -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, - enum ieee80211_back_parties initiator) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int ret = 0; - DECLARE_MAC_BUF(mac); + if (WARN_ON(!netif_running(sdata->dev))) + return 0; - if (tid >= STA_TID_NUM) + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) return -EINVAL; - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto stop_BA_exit; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); - - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, - ra, tid, NULL); - - /* case HW denied going back to legacy */ - if (ret) { - WARN_ON(ret != -EBUSY); - *state = HT_AGG_STATE_OPERATIONAL; - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - goto stop_BA_exit; - } - -stop_BA_exit: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); - -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) { - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); - return; - } - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", - *state); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - rcu_read_unlock(); - return; - } - - WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); - - *state |= HT_ADDBA_DRV_READY_MSK; - - if (*state == HT_AGG_STATE_OPERATIONAL) { - printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - } - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); - -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int agg_queue; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) { - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); - return; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); - rcu_read_unlock(); - return; - } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { - printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - rcu_read_unlock(); - return; - } - - if (*state & HT_AGG_STATE_INITIATOR_MSK) - ieee80211_send_delba(sta->sdata->dev, ra, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - - agg_queue = sta->tid_to_tx_q[tid]; - - /* avoid ordering issues: we are the only one that can modify - * the content of the qdiscs */ - spin_lock_bh(&local->mdev->queue_lock); - /* remove the queue for this aggregation */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - spin_unlock_bh(&local->mdev->queue_lock); - - /* we just requeued the all the frames that were in the removed - * queue, and since we might miss a softirq we do netif_schedule. - * ieee80211_wake_queue is not used here as this queue is not - * necessarily stopped */ - netif_schedule(local->mdev); - *state = HT_AGG_STATE_IDLE; - sta->ampdu_mlme.addba_req_num[tid] = 0; - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); - -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", skb->dev->name); - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); - -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping stop BA session", skb->dev->name); - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_DELBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); - -static void ieee80211_set_multicast_list(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int allmulti, promisc, sdata_allmulti, sdata_promisc; - - allmulti = !!(dev->flags & IFF_ALLMULTI); - promisc = !!(dev->flags & IFF_PROMISC); - sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); - sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); - - if (allmulti != sdata_allmulti) { - if (dev->flags & IFF_ALLMULTI) - atomic_inc(&local->iff_allmultis); - else - atomic_dec(&local->iff_allmultis); - sdata->flags ^= IEEE80211_SDATA_ALLMULTI; - } - - if (promisc != sdata_promisc) { - if (dev->flags & IFF_PROMISC) - atomic_inc(&local->iff_promiscs); - else - atomic_dec(&local->iff_promiscs); - sdata->flags ^= IEEE80211_SDATA_PROMISC; - } - - dev_mc_sync(local->mdev, dev); -} - -static const struct header_ops ieee80211_header_ops = { - .create = eth_header, - .parse = header_parse_80211, - .rebuild = eth_rebuild_header, - .cache = eth_header_cache, - .cache_update = eth_header_cache_update, -}; - -/* Must not be called for mdev */ -void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->destructor = ieee80211_if_free; -} - -/* everything else */ - -static int __ieee80211_if_config(struct net_device *dev, - struct sk_buff *beacon, - struct ieee80211_tx_control *control) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_conf conf; - - if (!local->ops->config_interface || !netif_running(dev)) + if (!local->ops->config_interface) return 0; memset(&conf, 0, sizeof(conf)); - conf.type = sdata->vif.type; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + conf.changed = changed; + + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - } else if (ieee80211_vif_is_mesh(&sdata->vif)) { - conf.beacon = beacon; - conf.beacon_control = control; - ieee80211_start_mesh(dev); - } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + conf.bssid = sdata->dev->dev_addr; conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; - conf.beacon = beacon; - conf.beacon_control = control; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + u8 zero[ETH_ALEN] = { 0 }; + conf.bssid = zero; + conf.ssid = zero; + conf.ssid_len = 0; + } else { + WARN_ON(1); + return -EINVAL; } - return local->ops->config_interface(local_to_hw(local), - &sdata->vif, &conf); -} -int ieee80211_if_config(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT && - (local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) - return ieee80211_if_config_beacon(dev); - return __ieee80211_if_config(dev, NULL, NULL); -} + if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) + return -EINVAL; -int ieee80211_if_config_beacon(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_control control; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sk_buff *skb; + if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID))) + return -EINVAL; - if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) - return 0; - skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif, - &control); - if (!skb) - return -ENOMEM; - return __ieee80211_if_config(dev, skb, &control); + return local->ops->config_interface(local_to_hw(local), + &sdata->vif, &conf); } int ieee80211_hw_config(struct ieee80211_local *local) @@ -1029,7 +202,7 @@ int ieee80211_hw_config(struct ieee80211_local *local) struct ieee80211_channel *chan; int ret = 0; - if (local->sta_sw_scanning) + if (local->sw_scanning) chan = local->scan_channel; else chan = local->oper_channel; @@ -1068,56 +241,84 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, struct ieee80211_supported_band *sband; struct ieee80211_ht_info ht_conf; struct ieee80211_ht_bss_info ht_bss_conf; - int i; u32 changed = 0; + int i; + u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS; + u8 tx_mcs_set_cap; sband = local->hw.wiphy->bands[conf->channel->band]; + memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); + memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); + /* HT is not supported */ if (!sband->ht_info.ht_supported) { conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - return 0; + goto out; } - memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); - memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); - - if (enable_ht) { - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) + /* disable HT */ + if (!enable_ht) { + if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) changed |= BSS_CHANGED_HT; + conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; + conf->ht_conf.ht_supported = 0; + goto out; + } - conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; - ht_conf.ht_supported = 1; - ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; - ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS); - ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS; + if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) + changed |= BSS_CHANGED_HT; - for (i = 0; i < SUPP_MCS_SET_LEN; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; + conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; + ht_conf.ht_supported = 1; - ht_bss_conf.primary_channel = req_bss_cap->primary_channel; - ht_bss_conf.bss_cap = req_bss_cap->bss_cap; - ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; + ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; + ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS); + ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS; + ht_bss_conf.primary_channel = req_bss_cap->primary_channel; + ht_bss_conf.bss_cap = req_bss_cap->bss_cap; + ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; - ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; - ht_conf.ampdu_density = req_ht_cap->ampdu_density; + ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; + ht_conf.ampdu_density = req_ht_cap->ampdu_density; - /* if bss configuration changed store the new one */ - if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || - memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { - changed |= BSS_CHANGED_HT; - memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); - memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); - } - } else { - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) - changed |= BSS_CHANGED_HT; - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - } + /* Bits 96-100 */ + tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12]; + + /* configure suppoerted Tx MCS according to requested MCS + * (based in most cases on Rx capabilities of peer) and self + * Tx MCS capabilities (as defined by low level driver HW + * Tx capabilities) */ + if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED)) + goto check_changed; + + /* Counting from 0 therfore + 1 */ + if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF) + max_tx_streams = ((tx_mcs_set_cap & + IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1; + + for (i = 0; i < max_tx_streams; i++) + ht_conf.supp_mcs_set[i] = + sband->ht_info.supp_mcs_set[i] & + req_ht_cap->supp_mcs_set[i]; + + if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM) + for (i = IEEE80211_SUPP_MCS_SET_UEQM; + i < IEEE80211_SUPP_MCS_SET_LEN; i++) + ht_conf.supp_mcs_set[i] = + sband->ht_info.supp_mcs_set[i] & + req_ht_cap->supp_mcs_set[i]; +check_changed: + /* if bss configuration changed store the new one */ + if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || + memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { + changed |= BSS_CHANGED_HT; + memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); + memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); + } +out: return changed; } @@ -1126,6 +327,9 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return; + if (!changed) return; @@ -1136,50 +340,28 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, changed); } -void ieee80211_reset_erp_info(struct net_device *dev) +u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata->bss_conf.use_cts_prot = 0; sdata->bss_conf.use_short_preamble = 0; - ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_ERP_CTS_PROT | - BSS_CHANGED_ERP_PREAMBLE); + return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE; } void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_tx_status *saved; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int tmp; skb->dev = local->mdev; - saved = kmalloc(sizeof(struct ieee80211_tx_status), GFP_ATOMIC); - if (unlikely(!saved)) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping tx status", skb->dev->name); - /* should be dev_kfree_skb_irq, but due to this function being - * named _irqsafe instead of just _irq we can't be sure that - * people won't call it from non-irq contexts */ - dev_kfree_skb_any(skb); - return; - } - memcpy(saved, status, sizeof(struct ieee80211_tx_status)); - /* copy pointer to saved status into skb->cb for use by tasklet */ - memcpy(skb->cb, &saved, sizeof(saved)); - skb->pkt_type = IEEE80211_TX_STATUS_MSG; - skb_queue_tail(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS ? + skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ? &local->skb_queue : &local->skb_queue_unreliable, skb); tmp = skb_queue_len(&local->skb_queue) + skb_queue_len(&local->skb_queue_unreliable); while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && (skb = skb_dequeue(&local->skb_queue_unreliable))) { - memcpy(&saved, skb->cb, sizeof(saved)); - kfree(saved); dev_kfree_skb_irq(skb); tmp--; I802_DEBUG_INC(local->tx_status_drop); @@ -1193,7 +375,6 @@ static void ieee80211_tasklet_handler(unsigned long data) struct ieee80211_local *local = (struct ieee80211_local *) data; struct sk_buff *skb; struct ieee80211_rx_status rx_status; - struct ieee80211_tx_status *tx_status; struct ieee80211_ra_tid *ra_tid; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -1208,12 +389,8 @@ static void ieee80211_tasklet_handler(unsigned long data) __ieee80211_rx(local_to_hw(local), skb, &rx_status); break; case IEEE80211_TX_STATUS_MSG: - /* get pointer to saved status out of skb->cb */ - memcpy(&tx_status, skb->cb, sizeof(tx_status)); skb->pkt_type = 0; - ieee80211_tx_status(local_to_hw(local), - skb, tx_status); - kfree(tx_status); + ieee80211_tx_status(local_to_hw(local), skb); break; case IEEE80211_DELBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; @@ -1227,9 +404,8 @@ static void ieee80211_tasklet_handler(unsigned long data) ra_tid->ra, ra_tid->tid); dev_kfree_skb(skb); break ; - default: /* should never get here! */ - printk(KERN_ERR "%s: Unknown message type (%d)\n", - wiphy_name(local->hw.wiphy), skb->pkt_type); + default: + WARN_ON(1); dev_kfree_skb(skb); break; } @@ -1239,29 +415,15 @@ static void ieee80211_tasklet_handler(unsigned long data) /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. - * Also, tx_packet_data in cb is restored from tx_control. */ + */ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, struct ieee80211_key *key, - struct sk_buff *skb, - struct ieee80211_tx_control *control) + struct sk_buff *skb) { - int hdrlen, iv_len, mic_len; - struct ieee80211_tx_packet_data *pkt_data; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex; - pkt_data->flags = 0; - if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) - pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; - if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT) - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; - if (control->flags & IEEE80211_TXCTL_REQUEUE) - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; - if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME) - pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME; - pkt_data->queue = control->queue; - - hdrlen = ieee80211_get_hdrlen_from_skb(skb); + unsigned int hdrlen, iv_len, mic_len; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!key) goto no_key; @@ -1283,32 +445,29 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, goto no_key; } - if (skb->len >= mic_len && + if (skb->len >= hdrlen + mic_len && !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) skb_trim(skb, skb->len - mic_len); - if (skb->len >= iv_len && skb->len > hdrlen) { + if (skb->len >= hdrlen + iv_len) { memmove(skb->data + iv_len, skb->data, hdrlen); - skb_pull(skb, iv_len); + hdr = (struct ieee80211_hdr *)skb_pull(skb, iv_len); } no_key: - { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc = le16_to_cpu(hdr->frame_control); - if ((fc & 0x8C) == 0x88) /* QoS Control Field */ { - fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); - memmove(skb->data + 2, skb->data, hdrlen - 2); - skb_pull(skb, 2); - } + if (ieee80211_is_data_qos(hdr->frame_control)) { + hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data, + hdrlen - IEEE80211_QOS_CTL_LEN); + skb_pull(skb, IEEE80211_QOS_CTL_LEN); } } static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, - struct sk_buff *skb, - struct ieee80211_tx_status *status) + struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + sta->tx_filtered_count++; /* @@ -1316,7 +475,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * packet. If the STA went to power save mode, this will happen * when it wakes up for the next time. */ - sta->flags |= WLAN_STA_CLEAR_PS_FILT; + set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); /* * This code races in the following way: @@ -1348,84 +507,91 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * can be unknown, for example with different interrupt status * bits. */ - if (sta->flags & WLAN_STA_PS && + if (test_sta_flags(sta, WLAN_STA_PS) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - ieee80211_remove_tx_extra(local, sta->key, skb, - &status->control); + ieee80211_remove_tx_extra(local, sta->key, skb); skb_queue_tail(&sta->tx_filtered, skb); return; } - if (!(sta->flags & WLAN_STA_PS) && - !(status->control.flags & IEEE80211_TXCTL_REQUEUE)) { + if (!test_sta_flags(sta, WLAN_STA_PS) && + !(info->flags & IEEE80211_TX_CTL_REQUEUE)) { /* Software retry the packet once */ - status->control.flags |= IEEE80211_TXCTL_REQUEUE; - ieee80211_remove_tx_extra(local, sta->key, skb, - &status->control); + info->flags |= IEEE80211_TX_CTL_REQUEUE; + ieee80211_remove_tx_extra(local, sta->key, skb); dev_queue_xmit(skb); return; } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: dropped TX filtered frame, " "queue_len=%d PS=%d @%lu\n", wiphy_name(local->hw.wiphy), skb_queue_len(&sta->tx_filtered), - !!(sta->flags & WLAN_STA_PS), jiffies); + !!test_sta_flags(sta, WLAN_STA_PS), jiffies); +#endif dev_kfree_skb(skb); } -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_tx_status *status) +void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 frag, type; + __le16 fc; + struct ieee80211_supported_band *sband; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; - - if (!status) { - printk(KERN_ERR - "%s: ieee80211_tx_status called with NULL status\n", - wiphy_name(local->hw.wiphy)); - dev_kfree_skb(skb); - return; - } + struct sta_info *sta; rcu_read_lock(); - if (status->excessive_retries) { - struct sta_info *sta; - sta = sta_info_get(local, hdr->addr1); - if (sta) { - if (sta->flags & WLAN_STA_PS) { - /* - * The STA is in power save mode, so assume - * that this TX packet failed because of that. - */ - status->excessive_retries = 0; - status->flags |= IEEE80211_TX_STATUS_TX_FILTERED; - ieee80211_handle_filtered_frame(local, sta, - skb, status); - rcu_read_unlock(); - return; - } + sta = sta_info_get(local, hdr->addr1); + + if (sta) { + if (info->status.excessive_retries && + test_sta_flags(sta, WLAN_STA_PS)) { + /* + * The STA is in power save mode, so assume + * that this TX packet failed because of that. + */ + ieee80211_handle_filtered_frame(local, sta, skb); + rcu_read_unlock(); + return; } - } - if (status->flags & IEEE80211_TX_STATUS_TX_FILTERED) { - struct sta_info *sta; - sta = sta_info_get(local, hdr->addr1); - if (sta) { - ieee80211_handle_filtered_frame(local, sta, skb, - status); + fc = hdr->frame_control; + + if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && + (ieee80211_is_data_qos(fc))) { + u16 tid, ssn; + u8 *qc; + + qc = ieee80211_get_qos_ctl(hdr); + tid = qc[0] & 0xf; + ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) + & IEEE80211_SCTL_SEQ); + ieee80211_send_bar(sta->sdata, hdr->addr1, + tid, ssn); + } + + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { + ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); return; + } else { + if (info->status.excessive_retries) + sta->tx_retry_failed++; + sta->tx_retry_count += info->status.retry_count; } - } else - rate_control_tx_status(local->mdev, skb, status); + + sband = local->hw.wiphy->bands[info->band]; + rate_control_tx_status(local, sband, sta, skb); + } rcu_read_unlock(); @@ -1439,14 +605,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE; - if (status->flags & IEEE80211_TX_STATUS_ACK) { + if (info->flags & IEEE80211_TX_STAT_ACK) { if (frag == 0) { local->dot11TransmittedFrameCount++; if (is_multicast_ether_addr(hdr->addr1)) local->dot11MulticastTransmittedFrameCount++; - if (status->retry_count > 0) + if (info->status.retry_count > 0) local->dot11RetryCount++; - if (status->retry_count > 1) + if (info->status.retry_count > 1) local->dot11MultipleRetryCount++; } @@ -1483,7 +649,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - rthdr = (struct ieee80211_tx_status_rtap_hdr*) + rthdr = (struct ieee80211_tx_status_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); memset(rthdr, 0, sizeof(*rthdr)); @@ -1492,17 +658,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); - if (!(status->flags & IEEE80211_TX_STATUS_ACK) && + if (!(info->flags & IEEE80211_TX_STAT_ACK) && !is_multicast_ether_addr(hdr->addr1)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) && - (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) && + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) + else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); - rthdr->data_retries = status->retry_count; + rthdr->data_retries = info->status.retry_count; /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); @@ -1513,7 +679,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { if (!netif_running(sdata->dev)) continue; @@ -1589,8 +755,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.queues = 1; /* default */ - local->bridge_packets = 1; - local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; local->short_retry_limit = 7; @@ -1601,7 +765,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->key_lock); - INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work); + INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); sta_info_init(local); @@ -1628,7 +792,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result; enum ieee80211_band band; struct net_device *mdev; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_master_priv *mpriv; /* * generic code guarantees at least one band, @@ -1648,23 +812,39 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + /* if low-level driver supports AP, we also support VLAN */ + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); + + /* mac80211 always supports monitor */ + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); + result = wiphy_register(local->hw.wiphy); if (result < 0) return result; - /* for now, mdev needs sub_if_data :/ */ - mdev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), - "wmaster%d", ether_setup); + /* + * We use the number of queues for feature tests (QoS, HT) internally + * so restrict them appropriately. + */ + if (hw->queues > IEEE80211_MAX_QUEUES) + hw->queues = IEEE80211_MAX_QUEUES; + if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES) + hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES; + if (hw->queues < 4) + hw->ampdu_queues = 0; + + mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), + "wmaster%d", ether_setup, + ieee80211_num_queues(hw)); if (!mdev) goto fail_mdev_alloc; - sdata = IEEE80211_DEV_TO_SUB_IF(mdev); - mdev->ieee80211_ptr = &sdata->wdev; - sdata->wdev.wiphy = local->hw.wiphy; - + mpriv = netdev_priv(mdev); + mpriv->local = local; local->mdev = mdev; - ieee80211_rx_bss_list_init(mdev); + ieee80211_rx_bss_list_init(local); mdev->hard_start_xmit = ieee80211_master_start_xmit; mdev->open = ieee80211_master_open; @@ -1673,18 +853,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev->header_ops = &ieee80211_header_ops; mdev->set_multicast_list = ieee80211_master_set_multicast_list; - sdata->vif.type = IEEE80211_IF_TYPE_AP; - sdata->dev = mdev; - sdata->local = local; - sdata->u.ap.force_unicast_rateidx = -1; - sdata->u.ap.max_ratectrl_rateidx = -1; - ieee80211_if_sdata_init(sdata); - - /* no RCU needed since we're still during init phase */ - list_add_tail(&sdata->list, &local->interfaces); - name = wiphy_dev(local->hw.wiphy)->driver->name; - local->hw.workqueue = create_singlethread_workqueue(name); + local->hw.workqueue = create_freezeable_workqueue(name); if (!local->hw.workqueue) { result = -ENOMEM; goto fail_workqueue; @@ -1700,15 +870,21 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) debugfs_hw_add(local); - local->hw.conf.beacon_int = 1000; + if (local->hw.conf.beacon_int < 10) + local->hw.conf.beacon_int = 100; + + if (local->hw.max_listen_interval == 0) + local->hw.max_listen_interval = 1; + + local->hw.conf.listen_interval = local->hw.max_listen_interval; - local->wstats_flags |= local->hw.max_rssi ? - IW_QUAL_LEVEL_UPDATED : IW_QUAL_LEVEL_INVALID; - local->wstats_flags |= local->hw.max_signal ? + local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | + IEEE80211_HW_SIGNAL_DB | + IEEE80211_HW_SIGNAL_DBM) ? IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; - local->wstats_flags |= local->hw.max_noise ? + local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; - if (local->hw.max_rssi < 0 || local->hw.max_noise < 0) + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) local->wstats_flags |= IW_QUAL_DBM; result = sta_info_start(local); @@ -1727,9 +903,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_dev; - ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); - ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); - result = ieee80211_init_rate_ctrl_alg(local, hw->rate_control_algorithm); if (result < 0) { @@ -1741,21 +914,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) result = ieee80211_wep_init(local); if (result < 0) { - printk(KERN_DEBUG "%s: Failed to initialize wep\n", - wiphy_name(local->hw.wiphy)); + printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", + wiphy_name(local->hw.wiphy), result); goto fail_wep; } - ieee80211_install_qdisc(local->mdev); + local->mdev->select_queue = ieee80211_select_queue; /* add one default STA interface */ - result = ieee80211_if_add(local->mdev, "wlan%d", NULL, - IEEE80211_IF_TYPE_STA, NULL); + result = ieee80211_if_add(local, "wlan%d", NULL, + NL80211_IFTYPE_STATION, NULL); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", wiphy_name(local->hw.wiphy)); - local->reg_state = IEEE80211_DEV_REGISTERED; rtnl_unlock(); ieee80211_led_init(local); @@ -1765,7 +937,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) fail_wep: rate_control_deinitialize(local); fail_rate: - ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); unregister_netdevice(local->mdev); local->mdev = NULL; fail_dev: @@ -1775,10 +946,8 @@ fail_sta_info: debugfs_hw_del(local); destroy_workqueue(local->hw.workqueue); fail_workqueue: - if (local->mdev != NULL) { - ieee80211_if_free(local->mdev); - local->mdev = NULL; - } + if (local->mdev) + free_netdev(local->mdev); fail_mdev_alloc: wiphy_unregister(local->hw.wiphy); return result; @@ -1788,42 +957,27 @@ EXPORT_SYMBOL(ieee80211_register_hw); void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata, *tmp; tasklet_kill(&local->tx_pending_tasklet); tasklet_kill(&local->tasklet); rtnl_lock(); - BUG_ON(local->reg_state != IEEE80211_DEV_REGISTERED); - - local->reg_state = IEEE80211_DEV_UNREGISTERED; - /* * At this point, interface list manipulations are fine * because the driver cannot be handing us frames any * more and the tasklet is killed. */ - /* - * First, we remove all non-master interfaces. Do this because they - * may have bss pointer dependency on the master, and when we free - * the master these would be freed as well, breaking our list - * iteration completely. - */ - list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { - if (sdata->dev == local->mdev) - continue; - list_del(&sdata->list); - __ieee80211_if_del(local, sdata); - } + /* First, we remove all virtual interfaces. */ + ieee80211_remove_interfaces(local); /* then, finally, remove the master interface */ - __ieee80211_if_del(local, IEEE80211_DEV_TO_SUB_IF(local->mdev)); + unregister_netdevice(local->mdev); rtnl_unlock(); - ieee80211_rx_bss_list_deinit(local->mdev); + ieee80211_rx_bss_list_deinit(local); ieee80211_clear_tx_pending(local); sta_info_stop(local); rate_control_deinitialize(local); @@ -1840,8 +994,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_unregister(local->hw.wiphy); ieee80211_wep_free(local); ieee80211_led_exit(local); - ieee80211_if_free(local->mdev); - local->mdev = NULL; + free_netdev(local->mdev); } EXPORT_SYMBOL(ieee80211_unregister_hw); @@ -1858,32 +1011,27 @@ static int __init ieee80211_init(void) struct sk_buff *skb; int ret; - BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb)); + BUILD_BUG_ON(sizeof(struct ieee80211_tx_info) > sizeof(skb->cb)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) + + IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb)); - ret = rc80211_pid_init(); + ret = rc80211_minstrel_init(); if (ret) - goto out; + return ret; - ret = ieee80211_wme_register(); - if (ret) { - printk(KERN_DEBUG "ieee80211_init: failed to " - "initialize WME (err=%d)\n", ret); - goto out_cleanup_pid; - } + ret = rc80211_pid_init(); + if (ret) + return ret; ieee80211_debugfs_netdev_init(); return 0; - - out_cleanup_pid: - rc80211_pid_exit(); - out: - return ret; } static void __exit ieee80211_exit(void) { rc80211_pid_exit(); + rc80211_minstrel_exit(); /* * For key todo, it'll be empty by now but the work @@ -1894,7 +1042,6 @@ static void __exit ieee80211_exit(void) if (mesh_allocated) ieee80211s_stop(); - ieee80211_wme_unregister(); ieee80211_debugfs_netdev_exit(); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 697ef67f96b6..8013277924f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -12,6 +12,9 @@ #include "ieee80211_i.h" #include "mesh.h" +#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) +#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) + #define PP_OFFSET 1 /* Path Selection Protocol */ #define PM_OFFSET 5 /* Path Selection Metric */ #define CC_OFFSET 9 /* Congestion Control Mode */ @@ -35,19 +38,28 @@ void ieee80211s_stop(void) kmem_cache_destroy(rm_cache); } +static void ieee80211_mesh_housekeeping_timer(unsigned long data) +{ + struct ieee80211_sub_if_data *sdata = (void *) data; + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + ifmsh->housekeeping = true; + queue_work(local->hw.workqueue, &ifmsh->work); +} + /** * mesh_matches_local - check if the config of a mesh point matches ours * * @ie: information elements of a management frame from the mesh peer - * @dev: local mesh interface + * @sdata: local mesh subif * * This function checks if the mesh configuration of a mesh point matches the * local mesh configuration, i.e. if both nodes belong to the same mesh network. */ -bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) +bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *sta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* * As support for each feature is added, check for matching @@ -59,11 +71,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) * - MDA enabled * - Power management control on fc */ - if (sta->mesh_id_len == ie->mesh_id_len && - memcmp(sta->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - memcmp(sta->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && - memcmp(sta->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && - memcmp(sta->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0) + if (ifmsh->mesh_id_len == ie->mesh_id_len && + memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && + memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0) return true; return false; @@ -73,10 +85,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links * * @ie: information elements of a management frame from the mesh peer - * @dev: local mesh interface */ -bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie, - struct net_device *dev) +bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { return (*(ie->mesh_config + CAPAB_OFFSET) & ACCEPT_PLINKS) != 0; } @@ -98,11 +108,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) */ free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.sta.accepting_plinks) - ieee80211_sta_timer((unsigned long) sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) + ieee80211_mesh_housekeeping_timer((unsigned long) sdata); } -void mesh_ids_set_default(struct ieee80211_if_sta *sta) +void mesh_ids_set_default(struct ieee80211_if_mesh *sta) { u8 def_id[4] = {0x00, 0x0F, 0xAC, 0xff}; @@ -111,28 +121,26 @@ void mesh_ids_set_default(struct ieee80211_if_sta *sta) memcpy(sta->mesh_cc_id, def_id, 4); } -int mesh_rmc_init(struct net_device *dev) +int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int i; - sdata->u.sta.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL); - if (!sdata->u.sta.rmc) + sdata->u.mesh.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL); + if (!sdata->u.mesh.rmc) return -ENOMEM; - sdata->u.sta.rmc->idx_mask = RMC_BUCKETS - 1; + sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.sta.rmc->bucket[i].list); + INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i].list); return 0; } -void mesh_rmc_free(struct net_device *dev) +void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct mesh_rmc *rmc = sdata->u.sta.rmc; + struct mesh_rmc *rmc = sdata->u.mesh.rmc; struct rmc_entry *p, *n; int i; - if (!sdata->u.sta.rmc) + if (!sdata->u.mesh.rmc) return; for (i = 0; i < RMC_BUCKETS; i++) @@ -142,7 +150,7 @@ void mesh_rmc_free(struct net_device *dev) } kfree(rmc); - sdata->u.sta.rmc = NULL; + sdata->u.mesh.rmc = NULL; } /** @@ -158,10 +166,9 @@ void mesh_rmc_free(struct net_device *dev) * it. */ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, - struct net_device *dev) + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct mesh_rmc *rmc = sdata->u.sta.rmc; + struct mesh_rmc *rmc = sdata->u.mesh.rmc; u32 seqnum = 0; int entries = 0; u8 idx; @@ -194,10 +201,9 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, return 0; } -void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) +void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u8 *pos; int len, i, rate; @@ -224,11 +230,11 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) } } - pos = skb_put(skb, 2 + sdata->u.sta.mesh_id_len); + pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len); *pos++ = WLAN_EID_MESH_ID; - *pos++ = sdata->u.sta.mesh_id_len; - if (sdata->u.sta.mesh_id_len) - memcpy(pos, sdata->u.sta.mesh_id, sdata->u.sta.mesh_id_len); + *pos++ = sdata->u.mesh.mesh_id_len; + if (sdata->u.mesh.mesh_id_len) + memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); pos = skb_put(skb, 21); *pos++ = WLAN_EID_MESH_CONFIG; @@ -237,15 +243,15 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) *pos++ = 1; /* Active path selection protocol ID */ - memcpy(pos, sdata->u.sta.mesh_pp_id, 4); + memcpy(pos, sdata->u.mesh.mesh_pp_id, 4); pos += 4; /* Active path selection metric ID */ - memcpy(pos, sdata->u.sta.mesh_pm_id, 4); + memcpy(pos, sdata->u.mesh.mesh_pm_id, 4); pos += 4; /* Congestion control mode identifier */ - memcpy(pos, sdata->u.sta.mesh_cc_id, 4); + memcpy(pos, sdata->u.mesh.mesh_cc_id, 4); pos += 4; /* Channel precedence: @@ -255,17 +261,17 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) pos += 4; /* Mesh capability */ - sdata->u.sta.accepting_plinks = mesh_plink_availables(sdata); - *pos++ = sdata->u.sta.accepting_plinks ? ACCEPT_PLINKS : 0x00; + sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); + *pos++ = sdata->u.mesh.accepting_plinks ? ACCEPT_PLINKS : 0x00; *pos++ = 0x00; return; } -u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl) +u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), dev->ifindex, tbl->hash_rnd) + return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd) & tbl->hash_mask; } @@ -315,6 +321,13 @@ struct mesh_table *mesh_table_alloc(int size_order) return newtbl; } +static void __mesh_table_free(struct mesh_table *tbl) +{ + kfree(tbl->hash_buckets); + kfree(tbl->hashwlock); + kfree(tbl); +} + void mesh_table_free(struct mesh_table *tbl, bool free_leafs) { struct hlist_head *mesh_hash; @@ -330,40 +343,33 @@ void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } spin_unlock(&tbl->hashwlock[i]); } - kfree(tbl->hash_buckets); - kfree(tbl->hashwlock); - kfree(tbl); + __mesh_table_free(tbl); } static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(&sdata->wdev); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; - queue_work(local->hw.workqueue, &ifsta->work); + queue_work(local->hw.workqueue, &ifmsh->work); } struct mesh_table *mesh_table_grow(struct mesh_table *tbl) { struct mesh_table *newtbl; struct hlist_head *oldhash; - struct hlist_node *p; - int err = 0; + struct hlist_node *p, *q; int i; if (atomic_read(&tbl->entries) - < tbl->mean_chain_len * (tbl->hash_mask + 1)) { - err = -EPERM; + < tbl->mean_chain_len * (tbl->hash_mask + 1)) goto endgrow; - } newtbl = mesh_table_alloc(tbl->size_order + 1); - if (!newtbl) { - err = -ENOMEM; + if (!newtbl) goto endgrow; - } newtbl->free_node = tbl->free_node; newtbl->mean_chain_len = tbl->mean_chain_len; @@ -373,13 +379,19 @@ struct mesh_table *mesh_table_grow(struct mesh_table *tbl) oldhash = tbl->hash_buckets; for (i = 0; i <= tbl->hash_mask; i++) hlist_for_each(p, &oldhash[i]) - tbl->copy_node(p, newtbl); + if (tbl->copy_node(p, newtbl) < 0) + goto errcopy; + return newtbl; + +errcopy: + for (i = 0; i <= newtbl->hash_mask; i++) { + hlist_for_each_safe(p, q, &newtbl->hash_buckets[i]) + tbl->free_node(p, 0); + } + __mesh_table_free(newtbl); endgrow: - if (err) - return NULL; - else - return newtbl; + return NULL; } /** @@ -393,50 +405,264 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata) { meshhdr->flags = 0; - meshhdr->ttl = sdata->u.sta.mshcfg.dot11MeshTTL; - put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum); - sdata->u.sta.mesh_seqnum++; + meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); + sdata->u.mesh.mesh_seqnum++; return 6; } +static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_mesh *ifmsh) +{ + bool free_plinks; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: running mesh housekeeping\n", + sdata->dev->name); +#endif + + ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); + mesh_path_expire(sdata); + + free_plinks = mesh_plink_availables(sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + + ifmsh->housekeeping = false; + mod_timer(&ifmsh->housekeeping_timer, + round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); +} + + +void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; + + ifmsh->housekeeping = true; + queue_work(local->hw.workqueue, &ifmsh->work); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); +} + +void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) +{ + del_timer_sync(&sdata->u.mesh.housekeeping_timer); + /* + * If the timer fired while we waited for it, it will have + * requeued the work. Now the work will be running again + * but will not rearm the timer again because it checks + * whether the interface is running, which, at this point, + * it no longer is. + */ + cancel_work_sync(&sdata->u.mesh.work); + + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.mesh.skb_queue); +} + +static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + u16 stype, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_local *local= sdata->local; + struct ieee802_11_elems elems; + struct ieee80211_channel *channel; + u64 supp_rates = 0; + size_t baselen; + int freq; + enum ieee80211_band band = rx_status->band; + + /* ignore ProbeResp to foreign address */ + if (stype == IEEE80211_STYPE_PROBE_RESP && + compare_ether_addr(mgmt->da, sdata->dev->dev_addr)) + return; + + baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + &elems); + + if (elems.ds_params && elems.ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + else + freq = rx_status->freq; + + channel = ieee80211_get_channel(local->hw.wiphy, freq); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + + if (elems.mesh_id && elems.mesh_config && + mesh_matches_local(&elems, sdata)) { + supp_rates = ieee80211_sta_get_rates(local, &elems, band); + + mesh_neighbour_update(mgmt->sa, supp_rates, sdata, + mesh_peer_accepts_plinks(&elems)); + } +} + +static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + switch (mgmt->u.action.category) { + case PLINK_CATEGORY: + mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + break; + case MESH_PATH_SEL_CATEGORY: + mesh_rx_path_sel_frame(sdata, mgmt, len); + break; + } +} + +static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_rx_status *rx_status; + struct ieee80211_if_mesh *ifmsh; + struct ieee80211_mgmt *mgmt; + u16 stype; + + ifmsh = &sdata->u.mesh; + + rx_status = (struct ieee80211_rx_status *) skb->cb; + mgmt = (struct ieee80211_mgmt *) skb->data; + stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; + + switch (stype) { + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_BEACON: + ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_ACTION: + ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); + break; + } + + kfree_skb(skb); +} + +static void ieee80211_mesh_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.mesh.work); + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sk_buff *skb; + + if (!netif_running(sdata->dev)) + return; + + if (local->sw_scanning || local->hw_scanning) + return; + + while ((skb = skb_dequeue(&ifmsh->skb_queue))) + ieee80211_mesh_rx_queued_mgmt(sdata, skb); + + if (ifmsh->preq_queue_len && + time_after(jiffies, + ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) + mesh_path_start_discovery(sdata); + + if (ifmsh->housekeeping) + ieee80211_mesh_housekeeping(sdata, ifmsh); +} + +void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + if (ieee80211_vif_is_mesh(&sdata->vif)) + queue_work(local->hw.workqueue, &sdata->u.mesh.work); + rcu_read_unlock(); +} + void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - - ifsta->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; - ifsta->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; - ifsta->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; - ifsta->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; - ifsta->mshcfg.dot11MeshTTL = MESH_TTL; - ifsta->mshcfg.auto_open_plinks = true; - ifsta->mshcfg.dot11MeshMaxPeerLinks = + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + INIT_WORK(&ifmsh->work, ieee80211_mesh_work); + setup_timer(&ifmsh->housekeeping_timer, + ieee80211_mesh_housekeeping_timer, + (unsigned long) sdata); + skb_queue_head_init(&sdata->u.mesh.skb_queue); + + ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; + ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; + ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; + ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; + ifmsh->mshcfg.dot11MeshTTL = MESH_TTL; + ifmsh->mshcfg.auto_open_plinks = true; + ifmsh->mshcfg.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS; - ifsta->mshcfg.dot11MeshHWMPactivePathTimeout = + ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT; - ifsta->mshcfg.dot11MeshHWMPpreqMinInterval = + ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT; - ifsta->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = + ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME; - ifsta->mshcfg.dot11MeshHWMPmaxPREQretries = + ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES; - ifsta->mshcfg.path_refresh_time = + ifmsh->mshcfg.path_refresh_time = MESH_PATH_REFRESH_TIME; - ifsta->mshcfg.min_discovery_timeout = + ifmsh->mshcfg.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT; - ifsta->accepting_plinks = true; - ifsta->preq_id = 0; - ifsta->dsn = 0; - atomic_set(&ifsta->mpaths, 0); - mesh_rmc_init(sdata->dev); - ifsta->last_preq = jiffies; + ifmsh->accepting_plinks = true; + ifmsh->preq_id = 0; + ifmsh->dsn = 0; + atomic_set(&ifmsh->mpaths, 0); + mesh_rmc_init(sdata); + ifmsh->last_preq = jiffies; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); - mesh_ids_set_default(ifsta); - setup_timer(&ifsta->mesh_path_timer, + mesh_ids_set_default(ifmsh); + setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); - INIT_LIST_HEAD(&ifsta->preq_queue.list); - spin_lock_init(&ifsta->mesh_preq_queue_lock); + INIT_LIST_HEAD(&ifmsh->preq_queue.list); + spin_lock_init(&ifmsh->mesh_preq_queue_lock); +} + +ieee80211_rx_result +ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mgmt *mgmt; + u16 fc; + + if (skb->len < 24) + return RX_DROP_MONITOR; + + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = le16_to_cpu(mgmt->frame_control); + + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_BEACON: + case IEEE80211_STYPE_ACTION: + memcpy(skb->cb, rx_status, sizeof(*rx_status)); + skb_queue_tail(&ifmsh->skb_queue, skb); + queue_work(local->hw.workqueue, &ifmsh->work); + return RX_QUEUED; + } + + return RX_CONTINUE; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 2e161f6d8288..e10471c6ba42 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -47,7 +47,7 @@ enum mesh_path_flags { * struct mesh_path - mac80211 mesh path structure * * @dst: mesh path destination mac address - * @dev: mesh path device + * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be * forwarded * @timer: mesh path discovery timer @@ -64,14 +64,15 @@ enum mesh_path_flags { * @state_lock: mesh pat state lock * * - * The combination of dst and dev is unique in the mesh path table. Since the + * The combination of dst and sdata is unique in the mesh path table. Since the * next_hop STA is only protected by RCU as well, deleting the STA must also * remove/substitute the mesh_path structure and wait until that is no longer * reachable before destroying the STA completely. */ struct mesh_path { u8 dst[ETH_ALEN]; - struct net_device *dev; + u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ + struct ieee80211_sub_if_data *sdata; struct sta_info *next_hop; struct timer_list timer; struct sk_buff_head frame_queue; @@ -109,7 +110,7 @@ struct mesh_table { __u32 hash_rnd; /* Used for hash generation */ atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ void (*free_node) (struct hlist_node *p, bool free_leafs); - void (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); + int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); int size_order; int mean_chain_len; }; @@ -203,68 +204,82 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata); int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, - struct net_device *dev); -bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev); -void mesh_ids_set_default(struct ieee80211_if_sta *sta); -void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev); -void mesh_rmc_free(struct net_device *dev); -int mesh_rmc_init(struct net_device *dev); + struct ieee80211_sub_if_data *sdata); +bool mesh_matches_local(struct ieee802_11_elems *ie, + struct ieee80211_sub_if_data *sdata); +void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); +void mesh_mgmt_ies_add(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); +int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_stop(void); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); +ieee80211_rx_result +ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); +void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); +void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); /* Mesh paths */ -int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb, - struct net_device *dev); -void mesh_path_start_discovery(struct net_device *dev); -struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev); -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev); +int mesh_nexthop_lookup(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(u8 *dst, + struct ieee80211_sub_if_data *sdata); +struct mesh_path *mpp_path_lookup(u8 *dst, + struct ieee80211_sub_if_data *sdata); +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup_by_idx(int idx, + struct ieee80211_sub_if_data *sdata); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); -void mesh_path_expire(struct net_device *dev); -void mesh_path_flush(struct net_device *dev); -void mesh_rx_path_sel_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, - size_t len); -int mesh_path_add(u8 *dst, struct net_device *dev); +void mesh_path_expire(struct ieee80211_sub_if_data *sdata); +void mesh_path_flush(struct ieee80211_sub_if_data *sdata); +void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); +int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); /* Mesh plinks */ -void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, - bool add); -bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie, - struct net_device *dev); +void mesh_neighbour_update(u8 *hw_addr, u64 rates, + struct ieee80211_sub_if_data *sdata, bool add); +bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); int mesh_plink_close(struct sta_info *sta); void mesh_plink_block(struct sta_info *sta); -void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, - size_t len, struct ieee80211_rx_status *rx_status); +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status); /* Private interfaces */ /* Mesh tables */ struct mesh_table *mesh_table_alloc(int size_order); void mesh_table_free(struct mesh_table *tbl, bool free_leafs); struct mesh_table *mesh_table_grow(struct mesh_table *tbl); -u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl); +u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, + struct mesh_table *tbl); /* Mesh paths */ int mesh_path_error_tx(u8 *dest, __le32 dest_dsn, u8 *ra, - struct net_device *dev); + struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); int mesh_pathtbl_init(void); void mesh_pathtbl_unregister(void); -int mesh_path_del(u8 *addr, struct net_device *dev); +int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); -void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev); +void mesh_path_discard_frame(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); #ifdef CONFIG_MAC80211_MESH extern int mesh_allocated; static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) { - return sdata->u.sta.mshcfg.dot11MeshMaxPeerLinks - - atomic_read(&sdata->u.sta.mshstats.estab_plinks); + return sdata->u.mesh.mshcfg.dot11MeshMaxPeerLinks - + atomic_read(&sdata->u.mesh.mshstats.estab_plinks); } static inline bool mesh_plink_availables(struct ieee80211_sub_if_data *sdata) @@ -282,10 +297,12 @@ static inline void mesh_path_activate(struct mesh_path *mpath) for (i = 0; i <= x->hash_mask; i++) \ hlist_for_each_entry_rcu(node, p, &x->hash_buckets[i], list) +void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); + #else #define mesh_allocated 0 +static inline void +ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} #endif -#define MESH_PREQ(skb) (skb->cb + 30) - #endif /* IEEE80211S_H */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index af0cd1e3e213..501c7831adb4 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -26,7 +26,7 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; - return le32_to_cpu(get_unaligned((__le32 *) (preq_elem + offset))); + return get_unaligned_le32(preq_elem + offset); } /* HWMP IE processing macros */ @@ -64,14 +64,14 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) #define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) #define net_traversal_jiffies(s) \ - msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) + msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) #define default_lifetime(s) \ - MSEC_TO_TU(s->u.sta.mshcfg.dot11MeshHWMPactivePathTimeout) + MSEC_TO_TU(s->u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout) #define min_preq_int_jiff(s) \ - (msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPpreqMinInterval)) -#define max_preq_retries(s) (s->u.sta.mshcfg.dot11MeshHWMPmaxPREQretries) + (msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval)) +#define max_preq_retries(s) (s->u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries) #define disc_timeout_jiff(s) \ - msecs_to_jiffies(sdata->u.sta.mshcfg.min_discovery_timeout) + msecs_to_jiffies(sdata->u.mesh.mshcfg.min_discovery_timeout) enum mpath_frame_type { MPATH_PREQ = 0, @@ -82,9 +82,9 @@ enum mpath_frame_type { static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst, __le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime, - __le32 metric, __le32 preq_id, struct net_device *dev) + __le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; u8 *pos; @@ -99,11 +99,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; mgmt->u.action.u.mesh_action.action_code = action; @@ -149,7 +149,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } @@ -161,9 +161,9 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, * @ra: node this frame is addressed to */ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, - struct net_device *dev) + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; u8 *pos; @@ -178,11 +178,11 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, ra, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; mgmt->u.action.u.mesh_action.action_code = MPATH_PERR; @@ -198,7 +198,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } @@ -223,7 +223,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ - rate = sband->bitrates[sta->txrate_idx].bitrate; + rate = sband->bitrates[sta->last_txrate_idx].bitrate; tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; @@ -233,7 +233,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, /** * hwmp_route_info_get - Update routing info to originator and transmitter * - * @dev: local mesh interface + * @sdata: local mesh subif * @mgmt: mesh management frame * @hwmp_ie: hwmp information element (PREP or PREQ) * @@ -246,11 +246,11 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * Notes: this function is the only place (besides user-provided info) where * path routing information is updated. */ -static u32 hwmp_route_info_get(struct net_device *dev, +static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *hwmp_ie) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; @@ -301,14 +301,14 @@ static u32 hwmp_route_info_get(struct net_device *dev, new_metric = MAX_METRIC; exp_time = TU_TO_EXP_TIME(orig_lifetime); - if (memcmp(orig_addr, dev->dev_addr, ETH_ALEN) == 0) { + if (memcmp(orig_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { /* This MP is the originator, we are not interested in this * frame, except for updating transmitter's path info. */ process = false; fresh_info = false; } else { - mpath = mesh_path_lookup(orig_addr, dev); + mpath = mesh_path_lookup(orig_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_FIXED) @@ -324,8 +324,8 @@ static u32 hwmp_route_info_get(struct net_device *dev, } } } else { - mesh_path_add(orig_addr, dev); - mpath = mesh_path_lookup(orig_addr, dev); + mesh_path_add(orig_addr, sdata); + mpath = mesh_path_lookup(orig_addr, sdata); if (!mpath) { rcu_read_unlock(); return 0; @@ -357,7 +357,7 @@ static u32 hwmp_route_info_get(struct net_device *dev, else { fresh_info = true; - mpath = mesh_path_lookup(ta, dev); + mpath = mesh_path_lookup(ta, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if ((mpath->flags & MESH_PATH_FIXED) || @@ -365,8 +365,8 @@ static u32 hwmp_route_info_get(struct net_device *dev, (last_hop_metric > mpath->metric))) fresh_info = false; } else { - mesh_path_add(ta, dev); - mpath = mesh_path_lookup(ta, dev); + mesh_path_add(ta, sdata); + mpath = mesh_path_lookup(ta, sdata); if (!mpath) { rcu_read_unlock(); return 0; @@ -392,11 +392,10 @@ static u32 hwmp_route_info_get(struct net_device *dev, return process ? new_metric : 0; } -static void hwmp_preq_frame_process(struct net_device *dev, +static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *preq_elem, u32 metric) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 *dst_addr, *orig_addr; u8 dst_flags, ttl; @@ -411,19 +410,19 @@ static void hwmp_preq_frame_process(struct net_device *dev, orig_dsn = PREQ_IE_ORIG_DSN(preq_elem); dst_flags = PREQ_IE_DST_F(preq_elem); - if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0) { + if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { forward = false; reply = true; metric = 0; - if (time_after(jiffies, ifsta->last_dsn_update + + if (time_after(jiffies, ifmsh->last_dsn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifsta->last_dsn_update)) { - dst_dsn = ++ifsta->dsn; - ifsta->last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_dsn_update)) { + dst_dsn = ++ifmsh->dsn; + ifmsh->last_dsn_update = jiffies; } } else { rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) { if ((!(mpath->flags & MESH_PATH_DSN_VALID)) || DSN_LT(mpath->dsn, dst_dsn)) { @@ -445,15 +444,15 @@ static void hwmp_preq_frame_process(struct net_device *dev, if (reply) { lifetime = PREQ_IE_LIFETIME(preq_elem); - ttl = ifsta->mshcfg.dot11MeshTTL; + ttl = ifmsh->mshcfg.dot11MeshTTL; if (ttl != 0) mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr, cpu_to_le32(dst_dsn), 0, orig_addr, cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, dev); + 0, sdata); else - ifsta->mshstats.dropped_frames_ttl++; + ifmsh->mshstats.dropped_frames_ttl++; } if (forward) { @@ -463,7 +462,7 @@ static void hwmp_preq_frame_process(struct net_device *dev, ttl = PREQ_IE_TTL(preq_elem); lifetime = PREQ_IE_LIFETIME(preq_elem); if (ttl <= 1) { - ifsta->mshstats.dropped_frames_ttl++; + ifmsh->mshstats.dropped_frames_ttl++; return; } --ttl; @@ -472,20 +471,19 @@ static void hwmp_preq_frame_process(struct net_device *dev, hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, cpu_to_le32(orig_dsn), dst_flags, dst_addr, - cpu_to_le32(dst_dsn), dev->broadcast, + cpu_to_le32(dst_dsn), sdata->dev->broadcast, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), cpu_to_le32(preq_id), - dev); - ifsta->mshstats.fwded_frames++; + sdata); + ifmsh->mshstats.fwded_frames++; } } -static void hwmp_prep_frame_process(struct net_device *dev, +static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *prep_elem, u32 metric) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mesh_path *mpath; u8 *dst_addr, *orig_addr; u8 ttl, hopcount, flags; @@ -499,18 +497,18 @@ static void hwmp_prep_frame_process(struct net_device *dev, * replies */ dst_addr = PREP_IE_DST_ADDR(prep_elem); - if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0) + if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) /* destination, no forwarding required */ return; ttl = PREP_IE_TTL(prep_elem); if (ttl <= 1) { - sdata->u.sta.mshstats.dropped_frames_ttl++; + sdata->u.mesh.mshstats.dropped_frames_ttl++; return; } rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -519,7 +517,7 @@ static void hwmp_prep_frame_process(struct net_device *dev, spin_unlock_bh(&mpath->state_lock); goto fail; } - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); spin_unlock_bh(&mpath->state_lock); --ttl; flags = PREP_IE_FLAGS(prep_elem); @@ -531,20 +529,20 @@ static void hwmp_prep_frame_process(struct net_device *dev, mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, cpu_to_le32(orig_dsn), 0, dst_addr, - cpu_to_le32(dst_dsn), mpath->next_hop->addr, hopcount, ttl, + cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, dev); + 0, sdata); rcu_read_unlock(); - sdata->u.sta.mshstats.fwded_frames++; + sdata->u.mesh.mshstats.fwded_frames++; return; fail: rcu_read_unlock(); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; return; } -static void hwmp_perr_frame_process(struct net_device *dev, +static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *perr_elem) { struct mesh_path *mpath; @@ -555,18 +553,18 @@ static void hwmp_perr_frame_process(struct net_device *dev, dst_addr = PERR_IE_DST_ADDR(perr_elem); dst_dsn = PERR_IE_DST_DSN(perr_elem); rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_ACTIVE && - memcmp(ta, mpath->next_hop->addr, ETH_ALEN) == 0 && + memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && (!(mpath->flags & MESH_PATH_DSN_VALID) || DSN_GT(dst_dsn, mpath->dsn))) { mpath->flags &= ~MESH_PATH_ACTIVE; mpath->dsn = dst_dsn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(dst_addr, cpu_to_le32(dst_dsn), - dev->broadcast, dev); + sdata->dev->broadcast, sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -575,7 +573,7 @@ static void hwmp_perr_frame_process(struct net_device *dev, -void mesh_rx_path_sel_frame(struct net_device *dev, +void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { @@ -583,6 +581,10 @@ void mesh_rx_path_sel_frame(struct net_device *dev, size_t baselen; u32 last_hop_metric; + /* need action_code */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + return; + baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, len - baselen, &elems); @@ -592,25 +594,25 @@ void mesh_rx_path_sel_frame(struct net_device *dev, if (!elems.preq || elems.preq_len != 37) /* Right now we support just 1 destination and no AE */ return; - last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.preq); + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq); if (!last_hop_metric) return; - hwmp_preq_frame_process(dev, mgmt, elems.preq, last_hop_metric); + hwmp_preq_frame_process(sdata, mgmt, elems.preq, last_hop_metric); break; case MPATH_PREP: if (!elems.prep || elems.prep_len != 31) /* Right now we support no AE */ return; - last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.prep); + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep); if (!last_hop_metric) return; - hwmp_prep_frame_process(dev, mgmt, elems.prep, last_hop_metric); + hwmp_prep_frame_process(sdata, mgmt, elems.prep, last_hop_metric); break; case MPATH_PERR: if (!elems.perr || elems.perr_len != 12) /* Right now we support only one destination per PERR */ return; - hwmp_perr_frame_process(dev, mgmt, elems.perr); + hwmp_perr_frame_process(sdata, mgmt, elems.perr); default: return; } @@ -628,9 +630,8 @@ void mesh_rx_path_sel_frame(struct net_device *dev, */ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) { - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(mpath->dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_KERNEL); @@ -639,9 +640,9 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) return; } - spin_lock(&ifsta->mesh_preq_queue_lock); - if (ifsta->preq_queue_len == MAX_PREQ_QUEUE_LEN) { - spin_unlock(&ifsta->mesh_preq_queue_lock); + spin_lock(&ifmsh->mesh_preq_queue_lock); + if (ifmsh->preq_queue_len == MAX_PREQ_QUEUE_LEN) { + spin_unlock(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) printk(KERN_DEBUG "Mesh HWMP: PREQ node queue full\n"); @@ -651,55 +652,53 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) memcpy(preq_node->dst, mpath->dst, ETH_ALEN); preq_node->flags = flags; - list_add_tail(&preq_node->list, &ifsta->preq_queue.list); - ++ifsta->preq_queue_len; - spin_unlock(&ifsta->mesh_preq_queue_lock); + list_add_tail(&preq_node->list, &ifmsh->preq_queue.list); + ++ifmsh->preq_queue_len; + spin_unlock(&ifmsh->mesh_preq_queue_lock); - if (time_after(jiffies, ifsta->last_preq + min_preq_int_jiff(sdata))) - queue_work(sdata->local->hw.workqueue, &ifsta->work); + if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) + queue_work(sdata->local->hw.workqueue, &ifmsh->work); - else if (time_before(jiffies, ifsta->last_preq)) { + else if (time_before(jiffies, ifmsh->last_preq)) { /* avoid long wait if did not send preqs for a long time * and jiffies wrapped around */ - ifsta->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; - queue_work(sdata->local->hw.workqueue, &ifsta->work); + ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; + queue_work(sdata->local->hw.workqueue, &ifmsh->work); } else - mod_timer(&ifsta->mesh_path_timer, ifsta->last_preq + + mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + min_preq_int_jiff(sdata)); } /** * mesh_path_start_discovery - launch a path discovery from the PREQ queue * - * @dev: local mesh interface + * @sdata: local mesh subif */ -void mesh_path_start_discovery(struct net_device *dev) +void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; struct mesh_path *mpath; u8 ttl, dst_flags; u32 lifetime; - spin_lock(&ifsta->mesh_preq_queue_lock); - if (!ifsta->preq_queue_len || - time_before(jiffies, ifsta->last_preq + + spin_lock(&ifmsh->mesh_preq_queue_lock); + if (!ifmsh->preq_queue_len || + time_before(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) { - spin_unlock(&ifsta->mesh_preq_queue_lock); + spin_unlock(&ifmsh->mesh_preq_queue_lock); return; } - preq_node = list_first_entry(&ifsta->preq_queue.list, + preq_node = list_first_entry(&ifmsh->preq_queue.list, struct mesh_preq_queue, list); list_del(&preq_node->list); - --ifsta->preq_queue_len; - spin_unlock(&ifsta->mesh_preq_queue_lock); + --ifmsh->preq_queue_len; + spin_unlock(&ifmsh->mesh_preq_queue_lock); rcu_read_lock(); - mpath = mesh_path_lookup(preq_node->dst, dev); + mpath = mesh_path_lookup(preq_node->dst, sdata); if (!mpath) goto enddiscovery; @@ -721,18 +720,18 @@ void mesh_path_start_discovery(struct net_device *dev) goto enddiscovery; } - ifsta->last_preq = jiffies; + ifmsh->last_preq = jiffies; - if (time_after(jiffies, ifsta->last_dsn_update + + if (time_after(jiffies, ifmsh->last_dsn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifsta->last_dsn_update)) { - ++ifsta->dsn; - sdata->u.sta.last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_dsn_update)) { + ++ifmsh->dsn; + sdata->u.mesh.last_dsn_update = jiffies; } lifetime = default_lifetime(sdata); - ttl = sdata->u.sta.mshcfg.dot11MeshTTL; + ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; if (ttl == 0) { - sdata->u.sta.mshstats.dropped_frames_ttl++; + sdata->u.mesh.mshstats.dropped_frames_ttl++; spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } @@ -743,11 +742,11 @@ void mesh_path_start_discovery(struct net_device *dev) dst_flags = MP_F_RF; spin_unlock_bh(&mpath->state_lock); - mesh_path_sel_frame_tx(MPATH_PREQ, 0, dev->dev_addr, - cpu_to_le32(ifsta->dsn), dst_flags, mpath->dst, - cpu_to_le32(mpath->dsn), dev->broadcast, 0, + mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr, + cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst, + cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0, ttl, cpu_to_le32(lifetime), 0, - cpu_to_le32(ifsta->preq_id++), dev); + cpu_to_le32(ifmsh->preq_id++), sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); enddiscovery: @@ -758,32 +757,33 @@ enddiscovery: /** * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame * - * @next_hop: output argument for next hop address - * @skb: frame to be sent - * @dev: network device the frame will be sent through + * @skb: 802.11 frame to be sent + * @sdata: network subif the frame will be sent through + * @fwd_frame: true if this frame was originally from a different host * * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is * found, the function will start a path discovery and queue the frame so it is * sent when the path is resolved. This means the caller must not free the skb * in this case. */ -int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb, - struct net_device *dev) +int mesh_nexthop_lookup(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sk_buff *skb_to_free = NULL; struct mesh_path *mpath; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u8 *dst_addr = hdr->addr3; int err = 0; rcu_read_lock(); - mpath = mesh_path_lookup(skb->data, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (!mpath) { - mesh_path_add(skb->data, dev); - mpath = mesh_path_lookup(skb->data, dev); + mesh_path_add(dst_addr, sdata); + mpath = mesh_path_lookup(dst_addr, sdata); if (!mpath) { dev_kfree_skb(skb); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; err = -ENOSPC; goto endlookup; } @@ -791,14 +791,15 @@ int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb, if (mpath->flags & MESH_PATH_ACTIVE) { if (time_after(jiffies, mpath->exp_time - - msecs_to_jiffies(sdata->u.sta.mshcfg.path_refresh_time)) - && skb->pkt_type != PACKET_OTHERHOST + msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) + && !memcmp(sdata->dev->dev_addr, hdr->addr4, + ETH_ALEN) && !(mpath->flags & MESH_PATH_RESOLVING) && !(mpath->flags & MESH_PATH_FIXED)) { mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); } - memcpy(next_hop, mpath->next_hop->addr, + memcpy(hdr->addr1, mpath->next_hop->sta.addr, ETH_ALEN); } else { if (!(mpath->flags & MESH_PATH_RESOLVING)) { @@ -814,7 +815,7 @@ int mesh_nexthop_lookup(u8 *next_hop, struct sk_buff *skb, skb_queue_tail(&mpath->frame_queue, skb); if (skb_to_free) - mesh_path_discard_frame(skb_to_free, dev); + mesh_path_discard_frame(skb_to_free, sdata); err = -ENOENT; } @@ -834,7 +835,7 @@ void mesh_path_timer(unsigned long data) if (!mpath) goto endmpathtimer; spin_lock_bh(&mpath->state_lock); - sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev); + sdata = mpath->sdata; if (mpath->flags & MESH_PATH_RESOLVED || (!(mpath->flags & MESH_PATH_RESOLVING))) mpath->flags &= ~(MESH_PATH_RESOLVING | MESH_PATH_RESOLVED); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 99c2d360888e..3c72557df45a 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -9,7 +9,6 @@ #include <linux/etherdevice.h> #include <linux/list.h> -#include <linux/netdevice.h> #include <linux/random.h> #include <linux/spinlock.h> #include <linux/string.h> @@ -37,6 +36,7 @@ struct mpath_node { }; static struct mesh_table *mesh_paths; +static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ /* This lock will have the grow table function as writer and add / delete nodes * as readers. When reading the table (i.e. doing lookups) we are well protected @@ -62,13 +62,13 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) /** * mesh_path_lookup - look up a path in the mesh path table * @dst: hardware address (ETH_ALEN length) of destination - * @dev: local interface + * @sdata: local subif * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) +struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; @@ -78,10 +78,10 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) tbl = rcu_dereference(mesh_paths); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, dev, tbl)]; + bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; hlist_for_each_entry_rcu(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) { if (MPATH_EXPIRED(mpath)) { spin_lock_bh(&mpath->state_lock); @@ -95,16 +95,44 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) return NULL; } +struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath; + struct hlist_node *n; + struct hlist_head *bucket; + struct mesh_table *tbl; + struct mpath_node *node; + + tbl = rcu_dereference(mpp_paths); + + bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; + hlist_for_each_entry_rcu(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && + memcmp(dst, mpath->dst, ETH_ALEN) == 0) { + if (MPATH_EXPIRED(mpath)) { + spin_lock_bh(&mpath->state_lock); + if (MPATH_EXPIRED(mpath)) + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); + } + return mpath; + } + } + return NULL; +} + + /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index * @idx: index - * @dev: local interface, or NULL for all entries + * @sdata: local subif, or NULL for all entries * * Returns: pointer to the mesh path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) +struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata) { struct mpath_node *node; struct hlist_node *p; @@ -112,7 +140,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) int j = 0; for_each_mesh_entry(mesh_paths, p, node, i) { - if (dev && node->mpath->dev != dev) + if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { if (MPATH_EXPIRED(node->mpath)) { @@ -131,15 +159,14 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) /** * mesh_path_add - allocate and add a new path to the mesh path table * @addr: destination address of the path (ETH_ALEN length) - * @dev: local interface + * @sdata: local subif * * Returns: 0 on sucess * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct net_device *dev) +int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; @@ -148,33 +175,28 @@ int mesh_path_add(u8 *dst, struct net_device *dev) int err = 0; u32 hash_idx; - if (memcmp(dst, dev->dev_addr, ETH_ALEN) == 0) + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ return -ENOTSUPP; if (is_multicast_ether_addr(dst)) return -ENOTSUPP; - if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0) + if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return -ENOSPC; + err = -ENOMEM; new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); - if (!new_mpath) { - atomic_dec(&sdata->u.sta.mpaths); - err = -ENOMEM; - goto endadd2; - } + if (!new_mpath) + goto err_path_alloc; + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); - if (!new_node) { - kfree(new_mpath); - atomic_dec(&sdata->u.sta.mpaths); - err = -ENOMEM; - goto endadd2; - } + if (!new_node) + goto err_node_alloc; read_lock(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); - new_mpath->dev = dev; + new_mpath->sdata = sdata; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_node->mpath = new_mpath; @@ -184,21 +206,16 @@ int mesh_path_add(u8 *dst, struct net_device *dev) spin_lock_init(&new_mpath->state_lock); init_timer(&new_mpath->timer); - hash_idx = mesh_table_hash(dst, dev, mesh_paths); + hash_idx = mesh_table_hash(dst, sdata, mesh_paths); bucket = &mesh_paths->hash_buckets[hash_idx]; spin_lock(&mesh_paths->hashwlock[hash_idx]); + err = -EEXIST; hlist_for_each_entry(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) - == 0) { - err = -EEXIST; - atomic_dec(&sdata->u.sta.mpaths); - kfree(new_node); - kfree(new_mpath); - goto endadd; - } + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + goto err_exists; } hlist_add_head_rcu(&new_node->list, bucket); @@ -206,10 +223,9 @@ int mesh_path_add(u8 *dst, struct net_device *dev) mesh_paths->mean_chain_len * (mesh_paths->hash_mask + 1)) grow = 1; -endadd: spin_unlock(&mesh_paths->hashwlock[hash_idx]); read_unlock(&pathtbl_resize_lock); - if (!err && grow) { + if (grow) { struct mesh_table *oldtbl, *newtbl; write_lock(&pathtbl_resize_lock); @@ -217,7 +233,7 @@ endadd: newtbl = mesh_table_grow(mesh_paths); if (!newtbl) { write_unlock(&pathtbl_resize_lock); - return -ENOMEM; + return 0; } rcu_assign_pointer(mesh_paths, newtbl); write_unlock(&pathtbl_resize_lock); @@ -225,7 +241,101 @@ endadd: synchronize_rcu(); mesh_table_free(oldtbl, false); } -endadd2: + return 0; + +err_exists: + spin_unlock(&mesh_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + kfree(new_node); +err_node_alloc: + kfree(new_mpath); +err_path_alloc: + atomic_dec(&sdata->u.mesh.mpaths); + return err; +} + + +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath, *new_mpath; + struct mpath_node *node, *new_node; + struct hlist_head *bucket; + struct hlist_node *n; + int grow = 0; + int err = 0; + u32 hash_idx; + + + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) + /* never add ourselves as neighbours */ + return -ENOTSUPP; + + if (is_multicast_ether_addr(dst)) + return -ENOTSUPP; + + err = -ENOMEM; + new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); + if (!new_mpath) + goto err_path_alloc; + + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); + if (!new_node) + goto err_node_alloc; + + read_lock(&pathtbl_resize_lock); + memcpy(new_mpath->dst, dst, ETH_ALEN); + memcpy(new_mpath->mpp, mpp, ETH_ALEN); + new_mpath->sdata = sdata; + new_mpath->flags = 0; + skb_queue_head_init(&new_mpath->frame_queue); + new_node->mpath = new_mpath; + new_mpath->exp_time = jiffies; + spin_lock_init(&new_mpath->state_lock); + + hash_idx = mesh_table_hash(dst, sdata, mpp_paths); + bucket = &mpp_paths->hash_buckets[hash_idx]; + + spin_lock(&mpp_paths->hashwlock[hash_idx]); + + err = -EEXIST; + hlist_for_each_entry(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + goto err_exists; + } + + hlist_add_head_rcu(&new_node->list, bucket); + if (atomic_inc_return(&mpp_paths->entries) >= + mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1)) + grow = 1; + + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + if (grow) { + struct mesh_table *oldtbl, *newtbl; + + write_lock(&pathtbl_resize_lock); + oldtbl = mpp_paths; + newtbl = mesh_table_grow(mpp_paths); + if (!newtbl) { + write_unlock(&pathtbl_resize_lock); + return 0; + } + rcu_assign_pointer(mpp_paths, newtbl); + write_unlock(&pathtbl_resize_lock); + + synchronize_rcu(); + mesh_table_free(oldtbl, false); + } + return 0; + +err_exists: + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + kfree(new_node); +err_node_alloc: + kfree(new_mpath); +err_path_alloc: return err; } @@ -243,7 +353,7 @@ void mesh_plink_broken(struct sta_info *sta) struct mesh_path *mpath; struct mpath_node *node; struct hlist_node *p; - struct net_device *dev = sta->sdata->dev; + struct ieee80211_sub_if_data *sdata = sta->sdata; int i; rcu_read_lock(); @@ -258,13 +368,12 @@ void mesh_plink_broken(struct sta_info *sta) spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(mpath->dst, cpu_to_le32(mpath->dsn), - dev->broadcast, dev); + sdata->dev->broadcast, sdata); } else spin_unlock_bh(&mpath->state_lock); } rcu_read_unlock(); } -EXPORT_SYMBOL(mesh_plink_broken); /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches @@ -287,11 +396,11 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) for_each_mesh_entry(mesh_paths, p, node, i) { mpath = node->mpath; if (mpath->next_hop == sta) - mesh_path_del(mpath->dst, mpath->dev); + mesh_path_del(mpath->dst, mpath->sdata); } } -void mesh_path_flush(struct net_device *dev) +void mesh_path_flush(struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -300,19 +409,18 @@ void mesh_path_flush(struct net_device *dev) for_each_mesh_entry(mesh_paths, p, node, i) { mpath = node->mpath; - if (mpath->dev == dev) - mesh_path_del(mpath->dst, mpath->dev); + if (mpath->sdata == sdata) + mesh_path_del(mpath->dst, mpath->sdata); } } static void mesh_path_node_reclaim(struct rcu_head *rp) { struct mpath_node *node = container_of(rp, struct mpath_node, rcu); - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(node->mpath->dev); + struct ieee80211_sub_if_data *sdata = node->mpath->sdata; del_timer_sync(&node->mpath->timer); - atomic_dec(&sdata->u.sta.mpaths); + atomic_dec(&sdata->u.mesh.mpaths); kfree(node->mpath); kfree(node); } @@ -321,11 +429,11 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) * mesh_path_del - delete a mesh path from the table * * @addr: dst address (ETH_ALEN length) - * @dev: local interface + * @sdata: local subif * * Returns: 0 if succesful */ -int mesh_path_del(u8 *addr, struct net_device *dev) +int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -335,13 +443,13 @@ int mesh_path_del(u8 *addr, struct net_device *dev) int err = 0; read_lock(&pathtbl_resize_lock); - hash_idx = mesh_table_hash(addr, dev, mesh_paths); + hash_idx = mesh_table_hash(addr, sdata, mesh_paths); bucket = &mesh_paths->hash_buckets[hash_idx]; spin_lock(&mesh_paths->hashwlock[hash_idx]); hlist_for_each_entry(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && + if (mpath->sdata == sdata && memcmp(addr, mpath->dst, ETH_ALEN) == 0) { spin_lock_bh(&mpath->state_lock); mpath->flags |= MESH_PATH_RESOLVING; @@ -381,36 +489,33 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @skb: frame to discard - * @dev: network device the frame was to be sent through + * @sdata: network subif the frame was to be sent through * * If the frame was beign forwarded from another MP, a PERR frame will be sent * to the precursor. * * Locking: the function must me called within a rcu_read_lock region */ -void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev) +void mesh_path_discard_frame(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct mesh_path *mpath; u32 dsn = 0; - if (skb->pkt_type == PACKET_OTHERHOST) { - struct ieee80211s_hdr *prev_meshhdr; - int mshhdrlen; + if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) { u8 *ra, *da; - prev_meshhdr = ((struct ieee80211s_hdr *)skb->cb); - mshhdrlen = ieee80211_get_mesh_hdrlen(prev_meshhdr); - da = skb->data; - ra = MESH_PREQ(skb); - mpath = mesh_path_lookup(da, dev); + da = hdr->addr3; + ra = hdr->addr2; + mpath = mesh_path_lookup(da, sdata); if (mpath) dsn = ++mpath->dsn; - mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, dev); + mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, sdata); } kfree_skb(skb); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; } /** @@ -422,14 +527,11 @@ void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev) */ void mesh_path_flush_pending(struct mesh_path *mpath) { - struct ieee80211_sub_if_data *sdata; struct sk_buff *skb; - sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev); - while ((skb = skb_dequeue(&mpath->frame_queue)) && (mpath->flags & MESH_PATH_ACTIVE)) - mesh_path_discard_frame(skb, mpath->dev); + mesh_path_discard_frame(skb, mpath->sdata); } /** @@ -460,39 +562,52 @@ static void mesh_path_node_free(struct hlist_node *p, bool free_leafs) struct mpath_node *node = hlist_entry(p, struct mpath_node, list); mpath = node->mpath; hlist_del_rcu(p); - synchronize_rcu(); if (free_leafs) kfree(mpath); kfree(node); } -static void mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) +static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) { struct mesh_path *mpath; struct mpath_node *node, *new_node; u32 hash_idx; + new_node = kmalloc(sizeof(struct mpath_node), GFP_ATOMIC); + if (new_node == NULL) + return -ENOMEM; + node = hlist_entry(p, struct mpath_node, list); mpath = node->mpath; - new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); new_node->mpath = mpath; - hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl); + hash_idx = mesh_table_hash(mpath->dst, mpath->sdata, newtbl); hlist_add_head(&new_node->list, &newtbl->hash_buckets[hash_idx]); + return 0; } int mesh_pathtbl_init(void) { mesh_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mesh_paths) + return -ENOMEM; mesh_paths->free_node = &mesh_path_node_free; mesh_paths->copy_node = &mesh_path_node_copy; mesh_paths->mean_chain_len = MEAN_CHAIN_LEN; - if (!mesh_paths) + + mpp_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mpp_paths) { + mesh_table_free(mesh_paths, true); return -ENOMEM; + } + mpp_paths->free_node = &mesh_path_node_free; + mpp_paths->copy_node = &mesh_path_node_copy; + mpp_paths->mean_chain_len = MEAN_CHAIN_LEN; + return 0; } -void mesh_path_expire(struct net_device *dev) +void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -501,7 +616,7 @@ void mesh_path_expire(struct net_device *dev) read_lock(&pathtbl_resize_lock); for_each_mesh_entry(mesh_paths, p, node, i) { - if (node->mpath->dev != dev) + if (node->mpath->sdata != sdata) continue; mpath = node->mpath; spin_lock_bh(&mpath->state_lock); @@ -510,7 +625,7 @@ void mesh_path_expire(struct net_device *dev) time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) { spin_unlock_bh(&mpath->state_lock); - mesh_path_del(mpath->dst, mpath->dev); + mesh_path_del(mpath->dst, mpath->sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -520,4 +635,5 @@ void mesh_path_expire(struct net_device *dev) void mesh_pathtbl_unregister(void) { mesh_table_free(mesh_paths, true); + mesh_table_free(mpp_paths, true); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 37f0c2b94ae7..faac101c0f85 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -36,11 +36,11 @@ #define MESH_SECURITY_AUTHENTICATION_IMPOSSIBLE 9 #define MESH_SECURITY_FAILED_VERIFICATION 10 -#define dot11MeshMaxRetries(s) (s->u.sta.mshcfg.dot11MeshMaxRetries) -#define dot11MeshRetryTimeout(s) (s->u.sta.mshcfg.dot11MeshRetryTimeout) -#define dot11MeshConfirmTimeout(s) (s->u.sta.mshcfg.dot11MeshConfirmTimeout) -#define dot11MeshHoldingTimeout(s) (s->u.sta.mshcfg.dot11MeshHoldingTimeout) -#define dot11MeshMaxPeerLinks(s) (s->u.sta.mshcfg.dot11MeshMaxPeerLinks) +#define dot11MeshMaxRetries(s) (s->u.mesh.mshcfg.dot11MeshMaxRetries) +#define dot11MeshRetryTimeout(s) (s->u.mesh.mshcfg.dot11MeshRetryTimeout) +#define dot11MeshConfirmTimeout(s) (s->u.mesh.mshcfg.dot11MeshConfirmTimeout) +#define dot11MeshHoldingTimeout(s) (s->u.mesh.mshcfg.dot11MeshHoldingTimeout) +#define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks) enum plink_frame_type { PLINK_OPEN = 0, @@ -63,14 +63,14 @@ enum plink_event { static inline void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { - atomic_inc(&sdata->u.sta.mshstats.estab_plinks); + atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); } static inline void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { - atomic_dec(&sdata->u.sta.mshstats.estab_plinks); + atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); } @@ -79,7 +79,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) * * @sta: mes peer link to restart * - * Locking: this function must be called holding sta->plink_lock + * Locking: this function must be called holding sta->lock */ static inline void mesh_plink_fsm_restart(struct sta_info *sta) { @@ -105,8 +105,8 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; - sta->flags |= WLAN_STA_AUTHORIZED; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->flags = WLAN_STA_AUTHORIZED; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; return sta; } @@ -118,7 +118,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, * * All mesh paths with this peer as next hop will be flushed * - * Locking: the caller must hold sta->plink_lock + * Locking: the caller must hold sta->lock */ static void __mesh_plink_deactivate(struct sta_info *sta) { @@ -139,15 +139,15 @@ static void __mesh_plink_deactivate(struct sta_info *sta) */ void mesh_plink_deactivate(struct sta_info *sta) { - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); __mesh_plink_deactivate(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } -static int mesh_plink_frame_tx(struct net_device *dev, +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, __le16 reason) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; bool include_plid = false; @@ -163,10 +163,10 @@ static int mesh_plink_frame_tx(struct net_device *dev, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.plink_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.plink_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = PLINK_CATEGORY; mgmt->u.action.u.plink_action.action_code = action; @@ -180,7 +180,7 @@ static int mesh_plink_frame_tx(struct net_device *dev, /* two-byte status code followed by two-byte AID */ memset(pos, 0, 4); } - mesh_mgmt_ies_add(skb, dev); + mesh_mgmt_ies_add(skb, sdata); } /* Add Peer Link Management element */ @@ -217,15 +217,14 @@ static int mesh_plink_frame_tx(struct net_device *dev, memcpy(pos, &reason, 2); } - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } -void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, +void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata, bool peer_accepting_plinks) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -244,10 +243,10 @@ void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, } sta->last_rx = jiffies; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN && - sdata->u.sta.accepting_plinks && - sdata->u.sta.mshcfg.auto_open_plinks) + sdata->u.mesh.accepting_plinks && + sdata->u.mesh.mshcfg.auto_open_plinks) mesh_plink_open(sta); rcu_read_unlock(); @@ -257,7 +256,6 @@ static void mesh_plink_timer(unsigned long data) { struct sta_info *sta; __le16 llid, plid, reason; - struct net_device *dev = NULL; struct ieee80211_sub_if_data *sdata; #ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG DECLARE_MAC_BUF(mac); @@ -270,19 +268,18 @@ static void mesh_plink_timer(unsigned long data) */ sta = (struct sta_info *) data; - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); if (sta->ignore_plink_timer) { sta->ignore_plink_timer = false; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return; } mpl_dbg("Mesh plink timer for %s fired on state %d\n", - print_mac(mac, sta->addr), sta->plink_state); + print_mac(mac, sta->sta.addr), sta->plink_state); reason = 0; llid = sta->llid; plid = sta->plid; sdata = sta->sdata; - dev = sdata->dev; switch (sta->plink_state) { case PLINK_OPN_RCVD: @@ -291,15 +288,15 @@ static void mesh_plink_timer(unsigned long data) if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; mpl_dbg("Mesh plink for %s (retry, timeout): %d %d\n", - print_mac(mac, sta->addr), + print_mac(mac, sta->sta.addr), sta->plink_retries, sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); sta->plink_timeout = sta->plink_timeout + rand % sta->plink_timeout; ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); break; } @@ -311,18 +308,18 @@ static void mesh_plink_timer(unsigned long data) reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT); sta->plink_state = PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case PLINK_HOLDING: /* holding timer */ del_timer(&sta->plink_timer); mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } } @@ -344,21 +341,21 @@ int mesh_plink_open(struct sta_info *sta) DECLARE_MAC_BUF(mac); #endif - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); sta->llid = llid; if (sta->plink_state != PLINK_LISTEN) { - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return -EBUSY; } sta->plink_state = PLINK_OPN_SNT; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink: starting establishment with %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); - return mesh_plink_frame_tx(sdata->dev, PLINK_OPEN, - sta->addr, llid, 0, 0); + return mesh_plink_frame_tx(sdata, PLINK_OPEN, + sta->sta.addr, llid, 0, 0); } void mesh_plink_block(struct sta_info *sta) @@ -367,10 +364,10 @@ void mesh_plink_block(struct sta_info *sta) DECLARE_MAC_BUF(mac); #endif - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); __mesh_plink_deactivate(sta); sta->plink_state = PLINK_BLOCKED; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } int mesh_plink_close(struct sta_info *sta) @@ -382,15 +379,15 @@ int mesh_plink_close(struct sta_info *sta) #endif mpl_dbg("Mesh plink: closing link with %s\n", - print_mac(mac, sta->addr)); - spin_lock_bh(&sta->plink_lock); + print_mac(mac, sta->sta.addr)); + spin_lock_bh(&sta->lock); sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); reason = sta->reason; if (sta->plink_state == PLINK_LISTEN || sta->plink_state == PLINK_BLOCKED) { mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); return 0; } else if (sta->plink_state == PLINK_ESTAB) { __mesh_plink_deactivate(sta); @@ -402,16 +399,15 @@ int mesh_plink_close(struct sta_info *sta) sta->plink_state = PLINK_HOLDING; llid = sta->llid; plid = sta->plid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(sta->sdata->dev, PLINK_CLOSE, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); return 0; } -void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee802_11_elems elems; struct sta_info *sta; @@ -425,6 +421,10 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, DECLARE_MAC_BUF(mac); #endif + /* need action_code, aux */ + if (len < IEEE80211_MIN_ACTION_SIZE + 3) + return; + if (is_multicast_ether_addr(mgmt->da)) { mpl_dbg("Mesh plink: ignore frame from multicast address"); return; @@ -478,7 +478,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, /* Now we will figure out the appropriate event... */ event = PLINK_UNDEFINED; - if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, dev))) { + if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { switch (ftype) { case PLINK_OPEN: event = OPN_RJCT; @@ -490,7 +490,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, /* avoid warning */ break; } - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); } else if (!sta) { /* ftype == PLINK_OPEN */ u64 rates; @@ -512,9 +512,9 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, return; } event = OPN_ACPT; - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); } else { - spin_lock_bh(&sta->plink_lock); + spin_lock_bh(&sta->lock); switch (ftype) { case PLINK_OPEN: if (!mesh_plink_free_count(sdata) || @@ -551,7 +551,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, break; default: mpl_dbg("Mesh plink: unknown frame subtype\n"); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); rcu_read_unlock(); return; } @@ -568,7 +568,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, switch (event) { case CLS_ACPT: mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; case OPN_ACPT: sta->plink_state = PLINK_OPN_RCVD; @@ -576,14 +576,14 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, get_random_bytes(&llid, 2); sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -603,8 +603,8 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: @@ -612,8 +612,8 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->plink_state = PLINK_OPN_RCVD; sta->plid = plid; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: @@ -622,10 +622,10 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, dot11MeshConfirmTimeout(sdata))) sta->ignore_plink_timer = true; - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -645,26 +645,26 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -684,22 +684,22 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->ignore_plink_timer = true; llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + print_mac(mac, sta->sta.addr)); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -713,18 +713,18 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->plink_state = PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } break; @@ -734,7 +734,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, if (del_timer(&sta->plink_timer)) sta->ignore_plink_timer = 1; mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; case OPN_ACPT: case CNF_ACPT: @@ -742,19 +742,19 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, case CNF_RJCT: llid = sta->llid; reason = sta->reason; - spin_unlock_bh(&sta->plink_lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, - plid, reason); + spin_unlock_bh(&sta->lock); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, + llid, plid, reason); break; default: - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); } break; default: /* should not get here, PLINK_BLOCKED is dealt with at the * beggining of the function */ - spin_unlock_bh(&sta->plink_lock); + spin_unlock_bh(&sta->lock); break; } diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c index 0f844f7895f1..408649bd4702 100644 --- a/net/mac80211/michael.c +++ b/net/mac80211/michael.c @@ -6,85 +6,68 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - #include <linux/types.h> +#include <linux/bitops.h> +#include <linux/ieee80211.h> +#include <asm/unaligned.h> #include "michael.h" -static inline u32 rotr(u32 val, int bits) -{ - return (val >> bits) | (val << (32 - bits)); -} - - -static inline u32 rotl(u32 val, int bits) -{ - return (val << bits) | (val >> (32 - bits)); -} - - -static inline u32 xswap(u32 val) -{ - return ((val & 0xff00ff00) >> 8) | ((val & 0x00ff00ff) << 8); -} - - -#define michael_block(l, r) \ -do { \ - r ^= rotl(l, 17); \ - l += r; \ - r ^= xswap(l); \ - l += r; \ - r ^= rotl(l, 3); \ - l += r; \ - r ^= rotr(l, 2); \ - l += r; \ -} while (0) - - -static inline u32 michael_get32(u8 *data) +static void michael_block(struct michael_mic_ctx *mctx, u32 val) { - return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + mctx->l ^= val; + mctx->r ^= rol32(mctx->l, 17); + mctx->l += mctx->r; + mctx->r ^= ((mctx->l & 0xff00ff00) >> 8) | + ((mctx->l & 0x00ff00ff) << 8); + mctx->l += mctx->r; + mctx->r ^= rol32(mctx->l, 3); + mctx->l += mctx->r; + mctx->r ^= ror32(mctx->l, 2); + mctx->l += mctx->r; } - -static inline void michael_put32(u32 val, u8 *data) +static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key, + struct ieee80211_hdr *hdr) { - data[0] = val & 0xff; - data[1] = (val >> 8) & 0xff; - data[2] = (val >> 16) & 0xff; - data[3] = (val >> 24) & 0xff; + u8 *da, *sa, tid; + + da = ieee80211_get_DA(hdr); + sa = ieee80211_get_SA(hdr); + if (ieee80211_is_data_qos(hdr->frame_control)) + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + else + tid = 0; + + mctx->l = get_unaligned_le32(key); + mctx->r = get_unaligned_le32(key + 4); + + /* + * A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC + * calculation, but it is _not_ transmitted + */ + michael_block(mctx, get_unaligned_le32(da)); + michael_block(mctx, get_unaligned_le16(&da[4]) | + (get_unaligned_le16(sa) << 16)); + michael_block(mctx, get_unaligned_le32(&sa[2])); + michael_block(mctx, tid); } - -void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, - u8 *data, size_t data_len, u8 *mic) +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic) { - u32 l, r, val; + u32 val; size_t block, blocks, left; + struct michael_mic_ctx mctx; - l = michael_get32(key); - r = michael_get32(key + 4); - - /* A pseudo header (DA, SA, Priority, 0, 0, 0) is used in Michael MIC - * calculation, but it is _not_ transmitted */ - l ^= michael_get32(da); - michael_block(l, r); - l ^= da[4] | (da[5] << 8) | (sa[0] << 16) | (sa[1] << 24); - michael_block(l, r); - l ^= michael_get32(&sa[2]); - michael_block(l, r); - l ^= priority; - michael_block(l, r); + michael_mic_hdr(&mctx, key, hdr); /* Real data */ blocks = data_len / 4; left = data_len % 4; - for (block = 0; block < blocks; block++) { - l ^= michael_get32(&data[block * 4]); - michael_block(l, r); - } + for (block = 0; block < blocks; block++) + michael_block(&mctx, get_unaligned_le32(&data[block * 4])); /* Partial block of 0..3 bytes and padding: 0x5a + 4..7 zeros to make * total length a multiple of 4. */ @@ -94,11 +77,10 @@ void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, left--; val |= data[blocks * 4 + left]; } - l ^= val; - michael_block(l, r); - /* last block is zero, so l ^ 0 = l */ - michael_block(l, r); - michael_put32(l, mic); - michael_put32(r, mic + 4); + michael_block(&mctx, val); + michael_block(&mctx, 0); + + put_unaligned_le32(mctx.l, mic); + put_unaligned_le32(mctx.r, mic + 4); } diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h index 2e6aebabeea1..3b848dad9587 100644 --- a/net/mac80211/michael.h +++ b/net/mac80211/michael.h @@ -14,7 +14,11 @@ #define MICHAEL_MIC_LEN 8 -void michael_mic(u8 *key, u8 *da, u8 *sa, u8 priority, - u8 *data, size_t data_len, u8 *mic); +struct michael_mic_ctx { + u32 l, r; +}; + +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic); #endif /* MICHAEL_H */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4d2b582dd055..49f86fa56bff 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -11,11 +11,6 @@ * published by the Free Software Foundation. */ -/* TODO: - * order BSS list by RSSI(?) ("quality of AP") - * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, - * SSID) - */ #include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> @@ -26,208 +21,460 @@ #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/iw_handler.h> -#include <asm/types.h> - #include <net/mac80211.h> +#include <asm/unaligned.h> + #include "ieee80211_i.h" #include "rate.h" #include "led.h" -#include "mesh.h" +#define IEEE80211_ASSOC_SCANS_MAX_TRIES 2 #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 #define IEEE80211_MONITORING_INTERVAL (2 * HZ) -#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define IEEE80211_PROBE_INTERVAL (60 * HZ) #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) -#define IEEE80211_PROBE_DELAY (HZ / 33) -#define IEEE80211_CHANNEL_TIME (HZ / 33) -#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) -#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 -#define ERP_INFO_USE_PROTECTION BIT(1) - -/* mgmt header + 1 byte action code */ -#define IEEE80211_MIN_ACTION_SIZE (24 + 1) - -#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 -#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C -#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 -#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 -#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 - -/* next values represent the buffer size for A-MPDU frame. - * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */ -#define IEEE80211_MIN_AMPDU_BUF 0x8 -#define IEEE80211_MAX_AMPDU_BUF 0x40 - -static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, - u8 *ssid, size_t ssid_len); -static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len); -static void ieee80211_rx_bss_put(struct net_device *dev, - struct ieee80211_sta_bss *bss); -static int ieee80211_sta_find_ibss(struct net_device *dev, - struct ieee80211_if_sta *ifsta); -static int ieee80211_sta_wep_configured(struct net_device *dev); -static int ieee80211_sta_start_scan(struct net_device *dev, - u8 *ssid, size_t ssid_len); -static int ieee80211_sta_config_auth(struct net_device *dev, - struct ieee80211_if_sta *ifsta); - - -void ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) +/* utils */ +static int ecw2cw(int ecw) { - size_t left = len; - u8 *pos = start; + return (1 << ecw) - 1; +} - memset(elems, 0, sizeof(*elems)); +static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie) +{ + u8 *end, *pos; - while (left >= 2) { - u8 id, elen; + pos = bss->ies; + if (pos == NULL) + return NULL; + end = pos + bss->ies_len; - id = *pos++; - elen = *pos++; - left -= 2; + while (pos + 1 < end) { + if (pos + 2 + pos[1] > end) + break; + if (pos[0] == ie) + return pos; + pos += 2 + pos[1]; + } - if (elen > left) - return; + return NULL; +} - switch (id) { - case WLAN_EID_SSID: - elems->ssid = pos; - elems->ssid_len = elen; - break; - case WLAN_EID_SUPP_RATES: - elems->supp_rates = pos; - elems->supp_rates_len = elen; - break; - case WLAN_EID_FH_PARAMS: - elems->fh_params = pos; - elems->fh_params_len = elen; - break; - case WLAN_EID_DS_PARAMS: - elems->ds_params = pos; - elems->ds_params_len = elen; - break; - case WLAN_EID_CF_PARAMS: - elems->cf_params = pos; - elems->cf_params_len = elen; - break; - case WLAN_EID_TIM: - elems->tim = pos; - elems->tim_len = elen; - break; - case WLAN_EID_IBSS_PARAMS: - elems->ibss_params = pos; - elems->ibss_params_len = elen; - break; - case WLAN_EID_CHALLENGE: - elems->challenge = pos; - elems->challenge_len = elen; - break; - case WLAN_EID_WPA: - if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && - pos[2] == 0xf2) { - /* Microsoft OUI (00:50:F2) */ - if (pos[3] == 1) { - /* OUI Type 1 - WPA IE */ - elems->wpa = pos; - elems->wpa_len = elen; - } else if (elen >= 5 && pos[3] == 2) { - if (pos[4] == 0) { - elems->wmm_info = pos; - elems->wmm_info_len = elen; - } else if (pos[4] == 1) { - elems->wmm_param = pos; - elems->wmm_param_len = elen; - } - } +static int ieee80211_compatible_rates(struct ieee80211_bss *bss, + struct ieee80211_supported_band *sband, + u64 *rates) +{ + int i, j, count; + *rates = 0; + count = 0; + for (i = 0; i < bss->supp_rates_len; i++) { + int rate = (bss->supp_rates[i] & 0x7F) * 5; + + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == rate) { + *rates |= BIT(j); + count++; + break; + } + } + + return count; +} + +/* also used by mesh code */ +u64 ieee80211_sta_get_rates(struct ieee80211_local *local, + struct ieee802_11_elems *elems, + enum ieee80211_band band) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + size_t num_rates; + u64 supp_rates; + int i, j; + sband = local->hw.wiphy->bands[band]; + + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + } + + bitrates = sband->bitrates; + num_rates = sband->n_bitrates; + supp_rates = 0; + for (i = 0; i < elems->supp_rates_len + + elems->ext_supp_rates_len; i++) { + u8 rate = 0; + int own_rate; + if (i < elems->supp_rates_len) + rate = elems->supp_rates[i]; + else if (elems->ext_supp_rates) + rate = elems->ext_supp_rates + [i - elems->supp_rates_len]; + own_rate = 5 * (rate & 0x7f); + for (j = 0; j < num_rates; j++) + if (bitrates[j].bitrate == own_rate) + supp_rates |= BIT(j); + } + return supp_rates; +} + +/* frame sending functions */ + +/* also used by scanning code */ +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u8 *pos, *supp_rates, *esupp_rates = NULL; + int i; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "request\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_REQ); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (dst) { + memcpy(mgmt->da, dst, ETH_ALEN); + memcpy(mgmt->bssid, dst, ETH_ALEN); + } else { + memset(mgmt->da, 0xff, ETH_ALEN); + memset(mgmt->bssid, 0xff, ETH_ALEN); + } + pos = skb_put(skb, 2 + ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ssid_len; + memcpy(pos, ssid, ssid_len); + + supp_rates = skb_put(skb, 2); + supp_rates[0] = WLAN_EID_SUPP_RATES; + supp_rates[1] = 0; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + if (esupp_rates) { + pos = skb_put(skb, 1); + esupp_rates[1]++; + } else if (supp_rates[1] == 8) { + esupp_rates = skb_put(skb, 3); + esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; + esupp_rates[1] = 1; + pos = &esupp_rates[2]; + } else { + pos = skb_put(skb, 1); + supp_rates[1]++; + } + *pos = rate->bitrate / 5; + } + + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, + int transaction, u8 *extra, size_t extra_len, + int encrypt) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*mgmt) + 6 + extra_len); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for auth " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); + memset(mgmt, 0, 24 + 6); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_AUTH); + if (encrypt) + mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg); + mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); + ifsta->auth_transaction = transaction + 1; + mgmt->u.auth.status_code = cpu_to_le16(0); + if (extra) + memcpy(skb_put(skb, extra_len), extra, extra_len); + + ieee80211_tx_skb(sdata, skb, encrypt); +} + +static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u8 *pos, *ies, *ht_add_ie; + int i, len, count, rates_len, supp_rates_len; + u16 capab; + struct ieee80211_bss *bss; + int wmm = 0; + struct ieee80211_supported_band *sband; + u64 rates = 0; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*mgmt) + 200 + ifsta->extra_ie_len + + ifsta->ssid_len); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + capab = ifsta->capab; + + if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + } + + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (bss) { + if (bss->capability & WLAN_CAPABILITY_PRIVACY) + capab |= WLAN_CAPABILITY_PRIVACY; + if (bss->wmm_used) + wmm = 1; + + /* get all rates supported by the device and the AP as + * some APs don't like getting a superset of their rates + * in the association request (e.g. D-Link DAP 1353 in + * b-only mode) */ + rates_len = ieee80211_compatible_rates(bss, sband, &rates); + + if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && + (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) + capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; + + ieee80211_rx_bss_put(local, bss); + } else { + rates = ~0; + rates_len = sband->n_bitrates; + } + + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + + if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { + skb_put(skb, 10); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_REASSOC_REQ); + mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); + mgmt->u.reassoc_req.listen_interval = + cpu_to_le16(local->hw.conf.listen_interval); + memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, + ETH_ALEN); + } else { + skb_put(skb, 4); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ASSOC_REQ); + mgmt->u.assoc_req.capab_info = cpu_to_le16(capab); + mgmt->u.reassoc_req.listen_interval = + cpu_to_le16(local->hw.conf.listen_interval); + } + + /* SSID */ + ies = pos = skb_put(skb, 2 + ifsta->ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ifsta->ssid_len; + memcpy(pos, ifsta->ssid, ifsta->ssid_len); + + /* add all rates which were marked to be used above */ + supp_rates_len = rates_len; + if (supp_rates_len > 8) + supp_rates_len = 8; + + len = sband->n_bitrates; + pos = skb_put(skb, supp_rates_len + 2); + *pos++ = WLAN_EID_SUPP_RATES; + *pos++ = supp_rates_len; + + count = 0; + for (i = 0; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + if (++count == 8) + break; + } + } + + if (rates_len > count) { + pos = skb_put(skb, rates_len - count + 2); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = rates_len - count; + + for (i++; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + } + } + + if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) { + /* 1. power capabilities */ + pos = skb_put(skb, 4); + *pos++ = WLAN_EID_PWR_CAPABILITY; + *pos++ = 2; + *pos++ = 0; /* min tx power */ + *pos++ = local->hw.conf.channel->max_power; /* max tx power */ + + /* 2. supported channels */ + /* TODO: get this in reg domain format */ + pos = skb_put(skb, 2 * sband->n_channels + 2); + *pos++ = WLAN_EID_SUPPORTED_CHANNELS; + *pos++ = 2 * sband->n_channels; + for (i = 0; i < sband->n_channels; i++) { + *pos++ = ieee80211_frequency_to_channel( + sband->channels[i].center_freq); + *pos++ = 1; /* one channel in the subband*/ + } + } + + if (ifsta->extra_ie) { + pos = skb_put(skb, ifsta->extra_ie_len); + memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); + } + + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { + pos = skb_put(skb, 9); + *pos++ = WLAN_EID_VENDOR_SPECIFIC; + *pos++ = 7; /* len */ + *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ + *pos++ = 0x50; + *pos++ = 0xf2; + *pos++ = 2; /* WME */ + *pos++ = 0; /* WME info */ + *pos++ = 1; /* WME ver */ + *pos++ = 0; + } + + /* wmm support is a must to HT */ + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && + sband->ht_info.ht_supported && + (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) { + struct ieee80211_ht_addt_info *ht_add_info = + (struct ieee80211_ht_addt_info *)ht_add_ie; + u16 cap = sband->ht_info.cap; + __le16 tmp; + u32 flags = local->hw.conf.channel->flags; + + switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) { + case IEEE80211_HT_IE_CHA_SEC_ABOVE: + if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SGI_40; } break; - case WLAN_EID_RSN: - elems->rsn = pos; - elems->rsn_len = elen; - break; - case WLAN_EID_ERP_INFO: - elems->erp_info = pos; - elems->erp_info_len = elen; - break; - case WLAN_EID_EXT_SUPP_RATES: - elems->ext_supp_rates = pos; - elems->ext_supp_rates_len = elen; - break; - case WLAN_EID_HT_CAPABILITY: - elems->ht_cap_elem = pos; - elems->ht_cap_elem_len = elen; - break; - case WLAN_EID_HT_EXTRA_INFO: - elems->ht_info_elem = pos; - elems->ht_info_elem_len = elen; - break; - case WLAN_EID_MESH_ID: - elems->mesh_id = pos; - elems->mesh_id_len = elen; - break; - case WLAN_EID_MESH_CONFIG: - elems->mesh_config = pos; - elems->mesh_config_len = elen; - break; - case WLAN_EID_PEER_LINK: - elems->peer_link = pos; - elems->peer_link_len = elen; - break; - case WLAN_EID_PREQ: - elems->preq = pos; - elems->preq_len = elen; - break; - case WLAN_EID_PREP: - elems->prep = pos; - elems->prep_len = elen; - break; - case WLAN_EID_PERR: - elems->perr = pos; - elems->perr_len = elen; - break; - default: + case IEEE80211_HT_IE_CHA_SEC_BELOW: + if (flags & IEEE80211_CHAN_NO_FAT_BELOW) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } break; } - left -= elen; - pos += elen; + tmp = cpu_to_le16(cap); + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2); + *pos++ = WLAN_EID_HT_CAPABILITY; + *pos++ = sizeof(struct ieee80211_ht_cap); + memset(pos, 0, sizeof(struct ieee80211_ht_cap)); + memcpy(pos, &tmp, sizeof(u16)); + pos += sizeof(u16); + /* TODO: needs a define here for << 2 */ + *pos++ = sband->ht_info.ampdu_factor | + (sband->ht_info.ampdu_density << 2); + memcpy(pos, sband->ht_info.supp_mcs_set, 16); } + + kfree(ifsta->assocreq_ies); + ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; + ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); + if (ifsta->assocreq_ies) + memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); + + ieee80211_tx_skb(sdata, skb, 0); } -static int ecw2cw(int ecw) +static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + u16 stype, u16 reason) { - return (1 << ecw) - 1; -} + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for " + "deauth/disassoc frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); -static void ieee80211_sta_def_wmm_params(struct net_device *dev, - struct ieee80211_sta_bss *bss, - int ibss) + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); + skb_put(skb, 2); + /* u.deauth.reason_code == u.disassoc.reason_code */ + mgmt->u.deauth.reason_code = cpu_to_le16(reason); + + ieee80211_tx_skb(sdata, skb, 0); +} + +/* MLME */ +static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss *bss) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; int i, have_higher_than_11mbit = 0; - /* cf. IEEE 802.11 9.2.12 */ for (i = 0; i < bss->supp_rates_len; i++) if ((bss->supp_rates[i] & 0x7f) * 5 > 110) @@ -239,49 +486,24 @@ static void ieee80211_sta_def_wmm_params(struct net_device *dev, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - - if (local->ops->conf_tx) { - struct ieee80211_tx_queue_params qparam; - - memset(&qparam, 0, sizeof(qparam)); - - qparam.aifs = 2; - - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && - !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)) - qparam.cw_min = 31; - else - qparam.cw_min = 15; - - qparam.cw_max = 1023; - qparam.txop = 0; - - for (i = IEEE80211_TX_QUEUE_DATA0; i < NUM_TX_DATA_QUEUES; i++) - local->ops->conf_tx(local_to_hw(local), - i + IEEE80211_TX_QUEUE_DATA0, - &qparam); - - if (ibss) { - /* IBSS uses different parameters for Beacon sending */ - qparam.cw_min++; - qparam.cw_min *= 2; - qparam.cw_min--; - local->ops->conf_tx(local_to_hw(local), - IEEE80211_TX_QUEUE_BEACON, &qparam); - } - } + ieee80211_set_wmm_default(sdata); } -static void ieee80211_sta_wmm_params(struct net_device *dev, +static void ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_if_sta *ifsta, u8 *wmm_param, size_t wmm_param_len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_queue_params params; size_t left; int count; u8 *pos; + if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + return; + + if (!wmm_param) + return; + if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) return; count = wmm_param[6] & 0x0f; @@ -305,47 +527,43 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, switch (aci) { case 1: - queue = IEEE80211_TX_QUEUE_DATA3; - if (acm) { + queue = 3; + if (acm) local->wmm_acm |= BIT(0) | BIT(3); - } break; case 2: - queue = IEEE80211_TX_QUEUE_DATA1; - if (acm) { + queue = 1; + if (acm) local->wmm_acm |= BIT(4) | BIT(5); - } break; case 3: - queue = IEEE80211_TX_QUEUE_DATA0; - if (acm) { + queue = 0; + if (acm) local->wmm_acm |= BIT(6) | BIT(7); - } break; case 0: default: - queue = IEEE80211_TX_QUEUE_DATA2; - if (acm) { + queue = 2; + if (acm) local->wmm_acm |= BIT(1) | BIT(2); - } break; } params.aifs = pos[0] & 0x0f; params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); - params.txop = pos[2] | (pos[3] << 8); -#ifdef CONFIG_MAC80211_DEBUG + params.txop = get_unaligned_le16(pos + 2); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " "cWmin=%d cWmax=%d txop=%d\n", - dev->name, queue, aci, acm, params.aifs, params.cw_min, + local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, params.cw_max, params.txop); #endif /* TODO: handle ACM (block TX, fallback to next lowest allowed * AC for now) */ if (local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { printk(KERN_DEBUG "%s: failed to set TX queue " - "parameters for queue %d\n", dev->name, queue); + "parameters for queue %d\n", local->mdev->name, queue); } } } @@ -355,11 +573,14 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, bool use_short_preamble) { struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_if_sta *ifsta = &sdata->u.sta; DECLARE_MAC_BUF(mac); +#endif u32 changed = 0; if (use_protection != bss_conf->use_cts_prot) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" "%s)\n", @@ -367,11 +588,13 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, use_protection ? "enabled" : "disabled", print_mac(mac, ifsta->bssid)); } +#endif bss_conf->use_cts_prot = use_protection; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (use_short_preamble != bss_conf->use_short_preamble) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: switched to %s barker preamble" " (BSSID=%s)\n", @@ -379,6 +602,7 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, use_short_preamble ? "short" : "long", print_mac(mac, ifsta->bssid)); } +#endif bss_conf->use_short_preamble = use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } @@ -397,7 +621,7 @@ static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, } static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta_bss *bss) + struct ieee80211_bss *bss) { u32 changed = 0; @@ -412,53 +636,18 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, return changed; } -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info) -{ - - if (ht_info == NULL) - return -EINVAL; - - memset(ht_info, 0, sizeof(*ht_info)); - - if (ht_cap_ie) { - u8 ampdu_info = ht_cap_ie->ampdu_params_info; - - ht_info->ht_supported = 1; - ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); - ht_info->ampdu_factor = - ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; - ht_info->ampdu_density = - (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; - memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); - } else - ht_info->ht_supported = 0; - - return 0; -} - -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info) +static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - if (bss_info == NULL) - return -EINVAL; - - memset(bss_info, 0, sizeof(*bss_info)); - - if (ht_add_info_ie) { - u16 op_mode; - op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); - - bss_info->primary_channel = ht_add_info_ie->control_chan; - bss_info->bss_cap = ht_add_info_ie->ht_param; - bss_info->bss_op_mode = (u8)(op_mode & 0xff); - } - - return 0; + union iwreq_data wrqu; + memset(&wrqu, 0, sizeof(wrqu)); + if (ifsta->flags & IEEE80211_STA_ASSOCIATED) + memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); } -static void ieee80211_sta_send_associnfo(struct net_device *dev, +static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { char *buf; @@ -501,148 +690,105 @@ static void ieee80211_sta_send_associnfo(struct net_device *dev, } } - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = len; - wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + if (len <= IW_CUSTOM_MAX) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = len; + wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf); + } kfree(buf); } -static void ieee80211_set_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - bool assoc) +static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local_to_hw(local)->conf; - union iwreq_data wrqu; u32 changed = BSS_CHANGED_ASSOC; - if (assoc) { - struct ieee80211_sta_bss *bss; - - ifsta->flags |= IEEE80211_STA_ASSOCIATED; + struct ieee80211_bss *bss; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return; + ifsta->flags |= IEEE80211_STA_ASSOCIATED; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, - conf->channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (bss) { - /* set timing information */ - sdata->bss_conf.beacon_int = bss->beacon_int; - sdata->bss_conf.timestamp = bss->timestamp; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; - changed |= ieee80211_handle_bss_capability(sdata, bss); + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + conf->channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (bss) { + /* set timing information */ + sdata->bss_conf.beacon_int = bss->beacon_int; + sdata->bss_conf.timestamp = bss->timestamp; + sdata->bss_conf.dtim_period = bss->dtim_period; - ieee80211_rx_bss_put(dev, bss); - } + changed |= ieee80211_handle_bss_capability(sdata, bss); - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - changed |= BSS_CHANGED_HT; - sdata->bss_conf.assoc_ht = 1; - sdata->bss_conf.ht_conf = &conf->ht_conf; - sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; - } + ieee80211_rx_bss_put(local, bss); + } - netif_carrier_on(dev); - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); - memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); - ieee80211_sta_send_associnfo(dev, ifsta); - } else { - ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); - ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - netif_carrier_off(dev); - ieee80211_reset_erp_info(dev); + if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { + changed |= BSS_CHANGED_HT; + sdata->bss_conf.assoc_ht = 1; + sdata->bss_conf.ht_conf = &conf->ht_conf; + sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; + } - sdata->bss_conf.assoc_ht = 0; - sdata->bss_conf.ht_conf = NULL; - sdata->bss_conf.ht_bss_conf = NULL; + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; + memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); + ieee80211_sta_send_associnfo(sdata, ifsta); - memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); - } ifsta->last_probe = jiffies; - ieee80211_led_assoc(local, assoc); + ieee80211_led_assoc(local, 1); - sdata->bss_conf.assoc = assoc; + sdata->bss_conf.assoc = 1; + /* + * For now just always ask the driver to update the basic rateset + * when we have associated, we aren't checking whether it actually + * changed or not. + */ + changed |= BSS_CHANGED_BASIC_RATES; ieee80211_bss_info_change_notify(sdata, changed); - wrqu.ap_addr.sa_family = ARPHRD_ETHER; - wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); -} -static void ieee80211_set_disassoc(struct net_device *dev, - struct ieee80211_if_sta *ifsta, int deauth) -{ - if (deauth) - ifsta->auth_tries = 0; - ifsta->assoc_tries = 0; - ieee80211_set_associated(dev, ifsta, 0); -} + netif_tx_start_all_queues(sdata->dev); + netif_carrier_on(sdata->dev); -void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, - int encrypt) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - skb->dev = sdata->local->mdev; - skb_set_mac_header(skb, 0); - skb_set_network_header(skb, 0); - skb_set_transport_header(skb, 0); - - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; - if (!encrypt) - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; - - dev_queue_xmit(skb); + ieee80211_sta_send_apinfo(sdata, ifsta); } - -static void ieee80211_send_auth(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - int transaction, u8 *extra, size_t extra_len, - int encrypt) +static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; + DECLARE_MAC_BUF(mac); - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 6 + extra_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for auth " - "frame\n", dev->name); + ifsta->direct_probe_tries++; + if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { + printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); - memset(mgmt, 0, 24 + 6); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_AUTH); - if (encrypt) - mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg); - mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); - ifsta->auth_transaction = transaction + 1; - mgmt->u.auth.status_code = cpu_to_le16(0); - if (extra) - memcpy(skb_put(skb, extra_len), extra, extra_len); + printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n", + sdata->dev->name, print_mac(mac, ifsta->bssid), + ifsta->direct_probe_tries); + + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; + + set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); - ieee80211_sta_tx(dev, skb, encrypt); + /* Direct probe is sent to broadcast address as some APs + * will not answer to direct packet in unassociated state. + */ + ieee80211_send_probe_req(sdata, NULL, + ifsta->ssid, ifsta->ssid_len); + + mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } -static void ieee80211_authenticate(struct net_device *dev, +static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { DECLARE_MAC_BUF(mac); @@ -651,264 +797,95 @@ static void ieee80211_authenticate(struct net_device *dev, if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { printk(KERN_DEBUG "%s: authentication with AP %s" " timed out\n", - dev->name, print_mac(mac, ifsta->bssid)); - ifsta->state = IEEE80211_DISABLED; + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - ifsta->state = IEEE80211_AUTHENTICATE; + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; printk(KERN_DEBUG "%s: authenticate with AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, print_mac(mac, ifsta->bssid)); - ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0); + ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } -static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss, - struct ieee80211_supported_band *sband, - u64 *rates) +static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, bool deauth, + bool self_disconnected, u16 reason) { - int i, j, count; - *rates = 0; - count = 0; - for (i = 0; i < bss->supp_rates_len; i++) { - int rate = (bss->supp_rates[i] & 0x7F) * 5; - - for (j = 0; j < sband->n_bitrates; j++) - if (sband->bitrates[j].bitrate == rate) { - *rates |= BIT(j); - count++; - break; - } - } - - return count; -} + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + u32 changed = BSS_CHANGED_ASSOC; -static void ieee80211_send_assoc(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u8 *pos, *ies; - int i, len, count, rates_len, supp_rates_len; - u16 capab; - struct ieee80211_sta_bss *bss; - int wmm = 0; - struct ieee80211_supported_band *sband; - u64 rates = 0; + rcu_read_lock(); - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 200 + ifsta->extra_ie_len + - ifsta->ssid_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " - "frame\n", dev->name); + sta = sta_info_get(local, ifsta->bssid); + if (!sta) { + rcu_read_unlock(); return; } - skb_reserve(skb, local->hw.extra_tx_headroom); - - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - capab = ifsta->capab; - - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; - } - - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, - local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (bss) { - if (bss->capability & WLAN_CAPABILITY_PRIVACY) - capab |= WLAN_CAPABILITY_PRIVACY; - if (bss->wmm_ie) { - wmm = 1; - } - - /* get all rates supported by the device and the AP as - * some APs don't like getting a superset of their rates - * in the association request (e.g. D-Link DAP 1353 in - * b-only mode) */ - rates_len = ieee80211_compatible_rates(bss, sband, &rates); - - ieee80211_rx_bss_put(dev, bss); - } else { - rates = ~0; - rates_len = sband->n_bitrates; - } - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - - if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { - skb_put(skb, 10); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_REASSOC_REQ); - mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); - mgmt->u.reassoc_req.listen_interval = cpu_to_le16(1); - memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, - ETH_ALEN); - } else { - skb_put(skb, 4); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ASSOC_REQ); - mgmt->u.assoc_req.capab_info = cpu_to_le16(capab); - mgmt->u.assoc_req.listen_interval = cpu_to_le16(1); + if (deauth) { + ifsta->direct_probe_tries = 0; + ifsta->auth_tries = 0; } + ifsta->assoc_scan_tries = 0; + ifsta->assoc_tries = 0; - /* SSID */ - ies = pos = skb_put(skb, 2 + ifsta->ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ifsta->ssid_len; - memcpy(pos, ifsta->ssid, ifsta->ssid_len); - - /* add all rates which were marked to be used above */ - supp_rates_len = rates_len; - if (supp_rates_len > 8) - supp_rates_len = 8; - - len = sband->n_bitrates; - pos = skb_put(skb, supp_rates_len + 2); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = supp_rates_len; - - count = 0; - for (i = 0; i < sband->n_bitrates; i++) { - if (BIT(i) & rates) { - int rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - if (++count == 8) - break; - } - } + netif_tx_stop_all_queues(sdata->dev); + netif_carrier_off(sdata->dev); - if (count == 8) { - pos = skb_put(skb, rates_len - count + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = rates_len - count; + ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); - for (i++; i < sband->n_bitrates; i++) { - if (BIT(i) & rates) { - int rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } - } + if (self_disconnected) { + if (deauth) + ieee80211_send_deauth_disassoc(sdata, + IEEE80211_STYPE_DEAUTH, reason); + else + ieee80211_send_deauth_disassoc(sdata, + IEEE80211_STYPE_DISASSOC, reason); } - if (ifsta->extra_ie) { - pos = skb_put(skb, ifsta->extra_ie_len); - memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); - } + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + changed |= ieee80211_reset_erp_info(sdata); - if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - pos = skb_put(skb, 9); - *pos++ = WLAN_EID_VENDOR_SPECIFIC; - *pos++ = 7; /* len */ - *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ - *pos++ = 0x50; - *pos++ = 0xf2; - *pos++ = 2; /* WME */ - *pos++ = 0; /* WME info */ - *pos++ = 1; /* WME ver */ - *pos++ = 0; - } - /* wmm support is a must to HT */ - if (wmm && sband->ht_info.ht_supported) { - __le16 tmp = cpu_to_le16(sband->ht_info.cap); - pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2); - *pos++ = WLAN_EID_HT_CAPABILITY; - *pos++ = sizeof(struct ieee80211_ht_cap); - memset(pos, 0, sizeof(struct ieee80211_ht_cap)); - memcpy(pos, &tmp, sizeof(u16)); - pos += sizeof(u16); - /* TODO: needs a define here for << 2 */ - *pos++ = sband->ht_info.ampdu_factor | - (sband->ht_info.ampdu_density << 2); - memcpy(pos, sband->ht_info.supp_mcs_set, 16); - } + if (sdata->bss_conf.assoc_ht) + changed |= BSS_CHANGED_HT; - kfree(ifsta->assocreq_ies); - ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; - ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); - if (ifsta->assocreq_ies) - memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); + sdata->bss_conf.assoc_ht = 0; + sdata->bss_conf.ht_conf = NULL; + sdata->bss_conf.ht_bss_conf = NULL; - ieee80211_sta_tx(dev, skb, 0); -} + ieee80211_led_assoc(local, 0); + sdata->bss_conf.assoc = 0; + ieee80211_sta_send_apinfo(sdata, ifsta); -static void ieee80211_send_deauth(struct net_device *dev, - struct ieee80211_if_sta *ifsta, u16 reason) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; + if (self_disconnected) + ifsta->state = IEEE80211_STA_MLME_DISABLED; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for deauth " - "frame\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); + sta_info_unlink(&sta); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_DEAUTH); - skb_put(skb, 2); - mgmt->u.deauth.reason_code = cpu_to_le16(reason); + rcu_read_unlock(); - ieee80211_sta_tx(dev, skb, 0); + sta_info_destroy(sta); } - -static void ieee80211_send_disassoc(struct net_device *dev, - struct ieee80211_if_sta *ifsta, u16 reason) +static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc " - "frame\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_DISASSOC); - skb_put(skb, 2); - mgmt->u.disassoc.reason_code = cpu_to_le16(reason); - - ieee80211_sta_tx(dev, skb, 0); + if (!sdata || !sdata->default_key || + sdata->default_key->conf.alg != ALG_WEP) + return 0; + return 1; } - -static int ieee80211_privacy_mismatch(struct net_device *dev, +static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; int bss_privacy; int wep_privacy; int privacy_invoked; @@ -916,17 +893,17 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) return 0; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifsta->bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (!bss) return 0; bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); - wep_privacy = !!ieee80211_sta_wep_configured(dev); + wep_privacy = !!ieee80211_sta_wep_configured(sdata); privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked)) return 0; @@ -934,8 +911,7 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, return 1; } - -static void ieee80211_associate(struct net_device *dev, +static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { DECLARE_MAC_BUF(mac); @@ -944,31 +920,31 @@ static void ieee80211_associate(struct net_device *dev, if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { printk(KERN_DEBUG "%s: association with AP %s" " timed out\n", - dev->name, print_mac(mac, ifsta->bssid)); - ifsta->state = IEEE80211_DISABLED; + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - ifsta->state = IEEE80211_ASSOCIATE; + ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; printk(KERN_DEBUG "%s: associate with AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); - if (ieee80211_privacy_mismatch(dev, ifsta)) { + sdata->dev->name, print_mac(mac, ifsta->bssid)); + if (ieee80211_privacy_mismatch(sdata, ifsta)) { printk(KERN_DEBUG "%s: mismatch in privacy configuration and " - "mixed-cell disabled - abort association\n", dev->name); - ifsta->state = IEEE80211_DISABLED; + "mixed-cell disabled - abort association\n", sdata->dev->name); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - ieee80211_send_assoc(dev, ifsta); + ieee80211_send_assoc(sdata, ifsta); mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); } -static void ieee80211_associated(struct net_device *dev, +static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int disassoc; DECLARE_MAC_BUF(mac); @@ -978,14 +954,14 @@ static void ieee80211_associated(struct net_device *dev, * for better APs. */ /* TODO: remove expired BSSes */ - ifsta->state = IEEE80211_ASSOCIATED; + ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; rcu_read_lock(); sta = sta_info_get(local, ifsta->bssid); if (!sta) { printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; } else { disassoc = 0; @@ -995,20 +971,19 @@ static void ieee80211_associated(struct net_device *dev, printk(KERN_DEBUG "%s: No ProbeResp from " "current AP %s - assume out of " "range\n", - dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, print_mac(mac, ifsta->bssid)); disassoc = 1; - sta_info_unlink(&sta); } else - ieee80211_send_probe_req(dev, ifsta->bssid, - local->scan_ssid, - local->scan_ssid_len); + ieee80211_send_probe_req(sdata, ifsta->bssid, + ifsta->ssid, + ifsta->ssid_len); ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; if (time_after(jiffies, ifsta->last_probe + IEEE80211_PROBE_INTERVAL)) { ifsta->last_probe = jiffies; - ieee80211_send_probe_req(dev, ifsta->bssid, + ieee80211_send_probe_req(sdata, ifsta->bssid, ifsta->ssid, ifsta->ssid_len); } @@ -1017,100 +992,25 @@ static void ieee80211_associated(struct net_device *dev, rcu_read_unlock(); - if (disassoc && sta) - sta_info_destroy(sta); - - if (disassoc) { - ifsta->state = IEEE80211_DISABLED; - ieee80211_set_associated(dev, ifsta, 0); - } else { + if (disassoc) + ieee80211_set_disassoc(sdata, ifsta, true, true, + WLAN_REASON_PREV_AUTH_NOT_VALID); + else mod_timer(&ifsta->timer, jiffies + IEEE80211_MONITORING_INTERVAL); - } } -static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, - u8 *ssid, size_t ssid_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_supported_band *sband; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u8 *pos, *supp_rates, *esupp_rates = NULL; - int i; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "request\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_PROBE_REQ); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (dst) { - memcpy(mgmt->da, dst, ETH_ALEN); - memcpy(mgmt->bssid, dst, ETH_ALEN); - } else { - memset(mgmt->da, 0xff, ETH_ALEN); - memset(mgmt->bssid, 0xff, ETH_ALEN); - } - pos = skb_put(skb, 2 + ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ssid_len; - memcpy(pos, ssid, ssid_len); - - supp_rates = skb_put(skb, 2); - supp_rates[0] = WLAN_EID_SUPP_RATES; - supp_rates[1] = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (esupp_rates) { - pos = skb_put(skb, 1); - esupp_rates[1]++; - } else if (supp_rates[1] == 8) { - esupp_rates = skb_put(skb, 3); - esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; - esupp_rates[1] = 1; - pos = &esupp_rates[2]; - } else { - pos = skb_put(skb, 1); - supp_rates[1]++; - } - *pos = rate->bitrate / 5; - } - - ieee80211_sta_tx(dev, skb, 0); -} - - -static int ieee80211_sta_wep_configured(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!sdata || !sdata->default_key || - sdata->default_key->conf.alg != ALG_WEP) - return 0; - return 1; -} - - -static void ieee80211_auth_completed(struct net_device *dev, +static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - printk(KERN_DEBUG "%s: authenticated\n", dev->name); + printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); ifsta->flags |= IEEE80211_STA_AUTHENTICATED; - ieee80211_associate(dev, ifsta); + ieee80211_associate(sdata, ifsta); } -static void ieee80211_auth_challenge(struct net_device *dev, +static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) @@ -1118,655 +1018,58 @@ static void ieee80211_auth_challenge(struct net_device *dev, u8 *pos; struct ieee802_11_elems elems; - printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name); pos = mgmt->u.auth.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); - if (!elems.challenge) { - printk(KERN_DEBUG "%s: no challenge IE in shared key auth " - "frame\n", dev->name); + if (!elems.challenge) return; - } - ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2, + ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, elems.challenge_len + 2, 1); } -static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid, - u8 dialog_token, u16 status, u16 policy, - u16 buf_size, u16 timeout) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.addba_resp)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer " - "for addba resp frame\n", dev->name); - return; - } - - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = dialog_token; - - capab = (u16)(policy << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ - - mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - - ieee80211_sta_tx(dev, skb, 0); - - return; -} - -void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, - u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.addba_req)); - - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for addba request frame\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); - - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; - - mgmt->u.action.u.addba_req.dialog_token = dialog_token; - capab = (u16)(1 << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ - - mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); - - mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_req.start_seq_num = - cpu_to_le16(start_seq_num << 4); - - ieee80211_sta_tx(dev, skb, 0); -} - -static void ieee80211_sta_process_addba_request(struct net_device *dev, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_conf *conf = &hw->conf; - struct sta_info *sta; - struct tid_ampdu_rx *tid_agg_rx; - u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; - u8 dialog_token; - int ret = -EOPNOTSUPP; - DECLARE_MAC_BUF(mac); - - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - return; - } - - /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); - start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; - - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; - - status = WLAN_STATUS_REQUEST_DECLINED; - - /* sanity check for incoming parameters: - * check if configuration can support the BA policy - * and if buffer size does not exceeds max value */ - if (((ba_policy != 1) - && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) - || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { - status = WLAN_STATUS_INVALID_QOS_PARAM; -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "AddBA Req with bad params from " - "%s on tid %u. policy %d, buffer size %d\n", - print_mac(mac, mgmt->sa), tid, ba_policy, - buf_size); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end_no_lock; - } - /* determine default buffer size */ - if (buf_size == 0) { - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[conf->channel->band]; - buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_info.ampdu_factor; - } - - - /* examine state machine */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); - - if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "unexpected AddBA Req from " - "%s on tid %u\n", - print_mac(mac, mgmt->sa), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end; - } - - /* prepare A-MPDU MLME for Rx aggregation */ - sta->ampdu_mlme.tid_rx[tid] = - kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_rx[tid]) { - if (net_ratelimit()) - printk(KERN_ERR "allocate rx mlme to tid %d failed\n", - tid); - goto end; - } - /* rx timer */ - sta->ampdu_mlme.tid_rx[tid]->session_timer.function = - sta_rx_agg_session_timer_expired; - sta->ampdu_mlme.tid_rx[tid]->session_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - - /* prepare reordering buffer */ - tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!tid_agg_rx->reorder_buf) { - if (net_ratelimit()) - printk(KERN_ERR "can not allocate reordering buffer " - "to tid %d\n", tid); - kfree(sta->ampdu_mlme.tid_rx[tid]); - goto end; - } - memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buff *)); - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, - sta->addr, tid, &start_seq_num); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (ret) { - kfree(tid_agg_rx->reorder_buf); - kfree(tid_agg_rx); - sta->ampdu_mlme.tid_rx[tid] = NULL; - goto end; - } - - /* change state and send addba resp */ - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; - tid_agg_rx->dialog_token = dialog_token; - tid_agg_rx->ssn = start_seq_num; - tid_agg_rx->head_seq_num = start_seq_num; - tid_agg_rx->buf_size = buf_size; - tid_agg_rx->timeout = timeout; - tid_agg_rx->stored_mpdu_num = 0; - status = WLAN_STATUS_SUCCESS; -end: - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); - -end_no_lock: - ieee80211_send_addba_resp(sta->sdata->dev, sta->addr, tid, - dialog_token, status, 1, buf_size, timeout); - rcu_read_unlock(); -} - -static void ieee80211_sta_process_addba_resp(struct net_device *dev, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - u16 capab; - u16 tid; - u8 *state; - - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - return; - } - - capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - printk(KERN_DEBUG "state not HT_ADDBA_REQUESTED_MSK:" - "%d\n", *state); - goto addba_resp_exit; - } - - if (mgmt->u.action.u.addba_resp.dialog_token != - sta->ampdu_mlme.tid_tx[tid]->dialog_token) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto addba_resp_exit; - } - - del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) - == WLAN_STATUS_SUCCESS) { - if (*state & HT_ADDBA_RECEIVED_MSK) - printk(KERN_DEBUG "double addBA response\n"); - - *state |= HT_ADDBA_RECEIVED_MSK; - sta->ampdu_mlme.addba_req_num[tid] = 0; - - if (*state == HT_AGG_STATE_OPERATIONAL) { - printk(KERN_DEBUG "Aggregation on for tid %d \n", tid); - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - } - - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - printk(KERN_DEBUG "recipient accepted agg: tid %d \n", tid); - } else { - printk(KERN_DEBUG "recipient rejected agg: tid %d \n", tid); - - sta->ampdu_mlme.addba_req_num[tid]++; - /* this will allow the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - ieee80211_stop_tx_ba_session(hw, sta->addr, tid, - WLAN_BACK_INITIATOR); - } - -addba_resp_exit: - rcu_read_unlock(); -} - -void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, - u16 initiator, u16 reason_code) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 params; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 + - sizeof(mgmt->u.action.u.delba)); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for delba frame\n", dev->name); - return; - } - - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); - - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; - params = (u16)(initiator << 11); /* bit 11 initiator */ - params |= (u16)(tid << 12); /* bit 15:12 TID number */ - - mgmt->u.action.u.delba.params = cpu_to_le16(params); - mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); - - ieee80211_sta_tx(dev, skb, 0); -} - -void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid, - u16 initiator, u16 reason) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - int ret, i; - DECLARE_MAC_BUF(mac); - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return; - } - - /* check if TID is in operational state */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); - if (sta->ampdu_mlme.tid_state_rx[tid] - != HT_AGG_STATE_OPERATIONAL) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); - rcu_read_unlock(); - return; - } - sta->ampdu_mlme.tid_state_rx[tid] = - HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); - - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, - ra, tid, NULL); - if (ret) - printk(KERN_DEBUG "HW problem - can not stop rx " - "aggergation for tid %d\n", tid); - - /* shutdown timer has not expired */ - if (initiator != WLAN_BACK_TIMER) - del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - /* check if this is a self generated aggregation halt */ - if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) - ieee80211_send_delba(dev, ra, tid, 0, reason); - - /* free the reordering buffer */ - for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { - if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { - /* release the reordered frames */ - dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); - sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; - sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; - } - } - /* free resources */ - kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; - - rcu_read_unlock(); -} - - -static void ieee80211_sta_process_delba(struct net_device *dev, - struct ieee80211_mgmt *mgmt, size_t len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - u16 tid, params; - u16 initiator; - DECLARE_MAC_BUF(mac); - - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - return; - } - - params = le16_to_cpu(mgmt->u.action.u.delba.params); - tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; - initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; - -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n", - print_mac(mac, mgmt->sa), - initiator ? "initiator" : "recipient", tid, - mgmt->u.action.u.delba.reason_code); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (initiator == WLAN_BACK_INITIATOR) - ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid, - WLAN_BACK_INITIATOR, 0); - else { /* WLAN_BACK_RECIPIENT */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - sta->ampdu_mlme.tid_state_tx[tid] = - HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid, - WLAN_BACK_RECIPIENT); - } - rcu_read_unlock(); -} - -/* - * After sending add Block Ack request we activated a timer until - * add Block Ack response will arrive from the recipient. - * If this timer expires sta_addba_resp_timer_expired will be executed. - */ -void sta_addba_resp_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and both sta_info and TID are needed, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u16 tid = *(u8 *)data; - struct sta_info *temp_sta = container_of((void *)data, - struct sta_info, timer_to_tid[tid]); - - struct ieee80211_local *local = temp_sta->local; - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - u8 *state; - - rcu_read_lock(); - - sta = sta_info_get(local, temp_sta->addr); - if (!sta) { - rcu_read_unlock(); - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - *state = HT_AGG_STATE_IDLE; - printk(KERN_DEBUG "timer expired on tid %d but we are not " - "expecting addBA response there", tid); - goto timer_expired_exit; - } - - printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); - - /* go through the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); - ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid, - WLAN_BACK_INITIATOR); - -timer_expired_exit: - rcu_read_unlock(); -} - -/* - * After accepting the AddBA Request we activated a timer, - * resetting it after each frame that arrives from the originator. - * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. - */ -void sta_rx_agg_session_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and various sta_info are needed here, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); - - printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); - ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr, - (u16)*ptid, WLAN_BACK_TIMER, - WLAN_REASON_QSTA_TIMEOUT); -} - -void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - int i; - - for (i = 0; i < STA_TID_NUM; i++) { - ieee80211_stop_tx_ba_session(&local->hw, addr, i, - WLAN_BACK_INITIATOR); - ieee80211_sta_stop_rx_ba_session(dev, addr, i, - WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS); - } -} - -static void ieee80211_rx_mgmt_auth(struct net_device *dev, +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u16 auth_alg, auth_transaction, status_code; DECLARE_MAC_BUF(mac); - if (ifsta->state != IEEE80211_AUTHENTICATE && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) { - printk(KERN_DEBUG "%s: authentication frame received from " - "%s, but not in authenticate state - ignored\n", - dev->name, print_mac(mac, mgmt->sa)); + if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return; - } - if (len < 24 + 6) { - printk(KERN_DEBUG "%s: too short (%zd) authentication frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 6) return; - } - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: authentication frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - } - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: authentication frame received from " - "unknown BSSID (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; - } auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - printk(KERN_DEBUG "%s: RX authentication from %s (alg=%d " - "transaction=%d status=%d)\n", - dev->name, print_mac(mac, mgmt->sa), auth_alg, - auth_transaction, status_code); - - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - /* IEEE 802.11 standard does not require authentication in IBSS + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + /* + * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. * However, try to reply to authentication attempts if someone * has actually implemented this. - * TODO: Could implement shared key authentication. */ - if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) { - printk(KERN_DEBUG "%s: unexpected IBSS authentication " - "frame (alg=%d transaction=%d)\n", - dev->name, auth_alg, auth_transaction); + */ + if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - } - ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0); + ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); } if (auth_alg != ifsta->auth_alg || - auth_transaction != ifsta->auth_transaction) { - printk(KERN_DEBUG "%s: unexpected authentication frame " - "(alg=%d transaction=%d)\n", - dev->name, auth_alg, auth_transaction); + auth_transaction != ifsta->auth_transaction) return; - } if (status_code != WLAN_STATUS_SUCCESS) { - printk(KERN_DEBUG "%s: AP denied authentication (auth_alg=%d " - "code=%d)\n", dev->name, ifsta->auth_alg, status_code); if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG) { u8 algs[3]; const int num_algs = ARRAY_SIZE(algs); @@ -1792,12 +1095,9 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, algs[pos] == 0xff) continue; if (algs[pos] == WLAN_AUTH_SHARED_KEY && - !ieee80211_sta_wep_configured(dev)) + !ieee80211_sta_wep_configured(sdata)) continue; ifsta->auth_alg = algs[pos]; - printk(KERN_DEBUG "%s: set auth_alg=%d for " - "next try\n", - dev->name, ifsta->auth_alg); break; } } @@ -1807,19 +1107,19 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, switch (ifsta->auth_alg) { case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: - ieee80211_auth_completed(dev, ifsta); + ieee80211_auth_completed(sdata, ifsta); break; case WLAN_AUTH_SHARED_KEY: if (ifsta->auth_transaction == 4) - ieee80211_auth_completed(dev, ifsta); + ieee80211_auth_completed(sdata, ifsta); else - ieee80211_auth_challenge(dev, ifsta, mgmt, len); + ieee80211_auth_challenge(sdata, ifsta, mgmt, len); break; } } -static void ieee80211_rx_mgmt_deauth(struct net_device *dev, +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) @@ -1827,45 +1127,31 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, u16 reason_code; DECLARE_MAC_BUF(mac); - if (len < 24 + 2) { - printk(KERN_DEBUG "%s: too short (%zd) deauthentication frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 2) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: deauthentication frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) return; - } reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - printk(KERN_DEBUG "%s: RX deauthentication from %s" - " (reason=%d)\n", - dev->name, print_mac(mac, mgmt->sa), reason_code); - - if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) { - printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); - } + if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) + printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name); - if (ifsta->state == IEEE80211_AUTHENTICATE || - ifsta->state == IEEE80211_ASSOCIATE || - ifsta->state == IEEE80211_ASSOCIATED) { - ifsta->state = IEEE80211_AUTHENTICATE; + if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || + ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || + ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; mod_timer(&ifsta->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(dev, ifsta, 1); + ieee80211_set_disassoc(sdata, ifsta, true, false, 0); ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; } -static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) @@ -1873,37 +1159,24 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, u16 reason_code; DECLARE_MAC_BUF(mac); - if (len < 24 + 2) { - printk(KERN_DEBUG "%s: too short (%zd) disassociation frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 2) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: disassociation frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) return; - } reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - printk(KERN_DEBUG "%s: RX disassociation from %s" - " (reason=%d)\n", - dev->name, print_mac(mac, mgmt->sa), reason_code); - if (ifsta->flags & IEEE80211_STA_ASSOCIATED) - printk(KERN_DEBUG "%s: disassociated\n", dev->name); + printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name); - if (ifsta->state == IEEE80211_ASSOCIATED) { - ifsta->state = IEEE80211_ASSOCIATE; + if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; mod_timer(&ifsta->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(dev, ifsta, 0); + ieee80211_set_disassoc(sdata, ifsta, false, false, 0); } @@ -1914,7 +1187,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, int reassoc) { struct ieee80211_local *local = sdata->local; - struct net_device *dev = sdata->dev; struct ieee80211_supported_band *sband; struct sta_info *sta; u64 rates, basic_rates; @@ -1929,27 +1201,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ - if (ifsta->state != IEEE80211_ASSOCIATE) { - printk(KERN_DEBUG "%s: association frame received from " - "%s, but not in associate state - ignored\n", - dev->name, print_mac(mac, mgmt->sa)); + if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) return; - } - if (len < 24 + 6) { - printk(KERN_DEBUG "%s: too short (%zd) association frame " - "received from %s - ignored\n", - dev->name, len, print_mac(mac, mgmt->sa)); + if (len < 24 + 6) return; - } - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { - printk(KERN_DEBUG "%s: association frame received from " - "unknown AP (SA=%s BSSID=%s) - " - "ignored\n", dev->name, print_mac(mac, mgmt->sa), - print_mac(mac, mgmt->bssid)); + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -1957,12 +1216,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " "status=%d aid=%d)\n", - dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), + sdata->dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", - dev->name, status_code); + sdata->dev->name, status_code); /* if this was a reassociation, ensure we try a "full" * association next time. This works around some broken APs * which do not correctly reject reassociation requests. */ @@ -1972,7 +1231,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", dev->name, aid); + "set\n", sdata->dev->name, aid); aid &= ~(BIT(15) | BIT(14)); pos = mgmt->u.assoc_resp.variable; @@ -1980,11 +1239,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!elems.supp_rates) { printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", - dev->name); + sdata->dev->name); return; } - printk(KERN_DEBUG "%s: associated\n", dev->name); + printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); ifsta->aid = aid; ifsta->ap_capab = capab_info; @@ -1999,33 +1258,35 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* Add STA entry for the AP */ sta = sta_info_get(local, ifsta->bssid); if (!sta) { - struct ieee80211_sta_bss *bss; + struct ieee80211_bss *bss; int err; sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); if (!sta) { printk(KERN_DEBUG "%s: failed to alloc STA entry for" - " the AP\n", dev->name); + " the AP\n", sdata->dev->name); rcu_read_unlock(); return; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifsta->bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (bss) { - sta->last_rssi = bss->rssi; sta->last_signal = bss->signal; + sta->last_qual = bss->qual; sta->last_noise = bss->noise; - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } err = sta_info_insert(sta); if (err) { printk(KERN_DEBUG "%s: failed to insert STA entry for" - " the AP (error %d)\n", dev->name, err); + " the AP (error %d)\n", sdata->dev->name, err); rcu_read_unlock(); return; } + /* update new sta with its last rx activity */ + sta->last_rx = jiffies; } /* @@ -2038,8 +1299,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * to between the sta_info_alloc() and sta_info_insert() above. */ - sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP | - WLAN_STA_AUTHORIZED; + set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP | + WLAN_STA_AUTHORIZED); rates = 0; basic_rates = 0; @@ -2073,8 +1334,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } } - sta->supp_rates[local->hw.conf.channel->band] = rates; - sdata->basic_rates = basic_rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + sdata->bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && @@ -2083,23 +1344,24 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { + if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && + (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { struct ieee80211_ht_bss_info bss_info; ieee80211_ht_cap_ie_to_ht_info( (struct ieee80211_ht_cap *) - elems.ht_cap_elem, &sta->ht_info); + elems.ht_cap_elem, &sta->sta.ht_info); ieee80211_ht_addt_info_ie_to_ht_bss_info( (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info); + ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); - if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - sta->flags |= WLAN_STA_WME; + if (elems.wmm_param) { + set_sta_flags(sta, WLAN_STA_WME); rcu_read_unlock(); - ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); } else rcu_read_unlock(); @@ -2108,240 +1370,26 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; - ieee80211_set_associated(dev, ifsta, 1); + ieee80211_set_associated(sdata, ifsta); - ieee80211_associated(dev, ifsta); + ieee80211_associated(sdata, ifsta); } -/* Caller must hold local->sta_bss_lock */ -static void __ieee80211_rx_bss_hash_add(struct net_device *dev, - struct ieee80211_sta_bss *bss) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - u8 hash_idx; - - if (bss_mesh_cfg(bss)) - hash_idx = mesh_id_hash(bss_mesh_id(bss), - bss_mesh_id_len(bss)); - else - hash_idx = STA_HASH(bss->bssid); - - bss->hnext = local->sta_bss_hash[hash_idx]; - local->sta_bss_hash[hash_idx] = bss; -} - - -/* Caller must hold local->sta_bss_lock */ -static void __ieee80211_rx_bss_hash_del(struct net_device *dev, - struct ieee80211_sta_bss *bss) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *b, *prev = NULL; - b = local->sta_bss_hash[STA_HASH(bss->bssid)]; - while (b) { - if (b == bss) { - if (!prev) - local->sta_bss_hash[STA_HASH(bss->bssid)] = - bss->hnext; - else - prev->hnext = bss->hnext; - break; - } - prev = b; - b = b->hnext; - } -} - - -static struct ieee80211_sta_bss * -ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - atomic_inc(&bss->users); - atomic_inc(&bss->users); - memcpy(bss->bssid, bssid, ETH_ALEN); - bss->freq = freq; - if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { - memcpy(bss->ssid, ssid, ssid_len); - bss->ssid_len = ssid_len; - } - - spin_lock_bh(&local->sta_bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->sta_bss_list); - __ieee80211_rx_bss_hash_add(dev, bss); - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - bss = local->sta_bss_hash[STA_HASH(bssid)]; - while (bss) { - if (!bss_mesh_cfg(bss) && - !memcmp(bss->bssid, bssid, ETH_ALEN) && - bss->freq == freq && - bss->ssid_len == ssid_len && - (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -#ifdef CONFIG_MAC80211_MESH -static struct ieee80211_sta_bss * -ieee80211_rx_mesh_bss_get(struct net_device *dev, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int freq) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)]; - while (bss) { - if (bss_mesh_cfg(bss) && - !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) && - bss->freq == freq && - mesh_id_len == bss->mesh_id_len && - (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id, - mesh_id_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -static struct ieee80211_sta_bss * -ieee80211_rx_mesh_bss_add(struct net_device *dev, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int mesh_config_len, int freq) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - if (mesh_config_len != MESH_CFG_LEN) - return NULL; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - - bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC); - if (!bss->mesh_cfg) { - kfree(bss); - return NULL; - } - - if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) { - bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC); - if (!bss->mesh_id) { - kfree(bss->mesh_cfg); - kfree(bss); - return NULL; - } - memcpy(bss->mesh_id, mesh_id, mesh_id_len); - } - - atomic_inc(&bss->users); - atomic_inc(&bss->users); - memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN); - bss->mesh_id_len = mesh_id_len; - bss->freq = freq; - spin_lock_bh(&local->sta_bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->sta_bss_list); - __ieee80211_rx_bss_hash_add(dev, bss); - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} -#endif - -static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss) -{ - kfree(bss->wpa_ie); - kfree(bss->rsn_ie); - kfree(bss->wmm_ie); - kfree(bss->ht_ie); - kfree(bss_mesh_id(bss)); - kfree(bss_mesh_cfg(bss)); - kfree(bss); -} - - -static void ieee80211_rx_bss_put(struct net_device *dev, - struct ieee80211_sta_bss *bss) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - local_bh_disable(); - if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) { - local_bh_enable(); - return; - } - - __ieee80211_rx_bss_hash_del(dev, bss); - list_del(&bss->list); - spin_unlock_bh(&local->sta_bss_lock); - ieee80211_rx_bss_free(bss); -} - - -void ieee80211_rx_bss_list_init(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - spin_lock_init(&local->sta_bss_lock); - INIT_LIST_HEAD(&local->sta_bss_list); -} - - -void ieee80211_rx_bss_list_deinit(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss, *tmp; - - list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list) - ieee80211_rx_bss_put(dev, bss); -} - - -static int ieee80211_sta_join_ibss(struct net_device *dev, +static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, - struct ieee80211_sta_bss *bss) + struct ieee80211_bss *bss) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; int res, rates, i, j; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - struct ieee80211_tx_control control; - struct rate_selection ratesel; u8 *pos; - struct ieee80211_sub_if_data *sdata; struct ieee80211_supported_band *sband; union iwreq_data wrqu; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* Remove possible STA entries from other IBSS networks. */ sta_info_flush_delayed(sdata); @@ -2350,7 +1398,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, local->ops->reset_tsf(local_to_hw(local)); } memcpy(ifsta->bssid, bss->bssid, ETH_ALEN); - res = ieee80211_if_config(dev); + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); if (res) return res; @@ -2359,29 +1407,27 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, sdata->drop_unencrypted = bss->capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(dev, bss->freq); + res = ieee80211_set_freq(sdata, bss->freq); if (res) return res; - /* Set beacon template */ + /* Build IBSS probe response */ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); - do { - if (!skb) - break; - + if (skb) { skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_BEACON); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); memset(mgmt->da, 0xff, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp); mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability); pos = skb_put(skb, 2 + ifsta->ssid_len); @@ -2419,184 +1465,55 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, memcpy(pos, &bss->supp_rates[8], rates); } - memset(&control, 0, sizeof(control)); - rate_control_get_rate(dev, sband, skb, &ratesel); - if (!ratesel.rate) { - printk(KERN_DEBUG "%s: Failed to determine TX rate " - "for IBSS beacon\n", dev->name); - break; - } - control.vif = &sdata->vif; - control.tx_rate = ratesel.rate; - if (sdata->bss_conf.use_short_preamble && - ratesel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - control.flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control.flags |= IEEE80211_TXCTL_NO_ACK; - control.retry_limit = 1; - - ifsta->probe_resp = skb_copy(skb, GFP_ATOMIC); - if (ifsta->probe_resp) { - mgmt = (struct ieee80211_mgmt *) - ifsta->probe_resp->data; - mgmt->frame_control = - IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_PROBE_RESP); - } else { - printk(KERN_DEBUG "%s: Could not allocate ProbeResp " - "template for IBSS\n", dev->name); - } + ifsta->probe_resp = skb; - if (local->ops->beacon_update && - local->ops->beacon_update(local_to_hw(local), - skb, &control) == 0) { - printk(KERN_DEBUG "%s: Configured IBSS beacon " - "template\n", dev->name); - skb = NULL; - } - - rates = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - for (i = 0; i < bss->supp_rates_len; i++) { - int bitrate = (bss->supp_rates[i] & 0x7f) * 5; - for (j = 0; j < sband->n_bitrates; j++) - if (sband->bitrates[j].bitrate == bitrate) - rates |= BIT(j); - } - ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; - - ieee80211_sta_def_wmm_params(dev, bss, 1); - } while (0); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + } - if (skb) { - printk(KERN_DEBUG "%s: Failed to configure IBSS beacon " - "template\n", dev->name); - dev_kfree_skb(skb); + rates = 0; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + for (i = 0; i < bss->supp_rates_len; i++) { + int bitrate = (bss->supp_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == bitrate) + rates |= BIT(j); } + ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; + + ieee80211_sta_def_wmm_params(sdata, bss); - ifsta->state = IEEE80211_IBSS_JOINED; + ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + ieee80211_led_assoc(local, true); + memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); - wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); + wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); return res; } -u64 ieee80211_sta_get_rates(struct ieee80211_local *local, - struct ieee802_11_elems *elems, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - size_t num_rates; - u64 supp_rates; - int i, j; - sband = local->hw.wiphy->bands[band]; - - if (!sband) { - WARN_ON(1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - } - - bitrates = sband->bitrates; - num_rates = sband->n_bitrates; - supp_rates = 0; - for (i = 0; i < elems->supp_rates_len + - elems->ext_supp_rates_len; i++) { - u8 rate = 0; - int own_rate; - if (i < elems->supp_rates_len) - rate = elems->supp_rates[i]; - else if (elems->ext_supp_rates) - rate = elems->ext_supp_rates - [i - elems->supp_rates_len]; - own_rate = 5 * (rate & 0x7f); - for (j = 0; j < num_rates; j++) - if (bitrates[j].bitrate == own_rate) - supp_rates |= BIT(j); - } - return supp_rates; -} - - -static void ieee80211_rx_bss_info(struct net_device *dev, +static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, - int beacon) + struct ieee802_11_elems *elems, + bool beacon) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee802_11_elems elems; - size_t baselen; - int freq, clen; - struct ieee80211_sta_bss *bss; + struct ieee80211_local *local = sdata->local; + int freq; + struct ieee80211_bss *bss; struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - u64 beacon_timestamp, rx_timestamp; struct ieee80211_channel *channel; + u64 beacon_timestamp, rx_timestamp; + u64 supp_rates = 0; + enum ieee80211_band band = rx_status->band; DECLARE_MAC_BUF(mac); DECLARE_MAC_BUF(mac2); - if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN)) - return; /* ignore ProbeResp to foreign address */ - -#if 0 - printk(KERN_DEBUG "%s: RX %s from %s to %s\n", - dev->name, beacon ? "Beacon" : "Probe Response", - print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da)); -#endif - - baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; - if (baselen > len) - return; - - beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); - ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - - if (ieee80211_vif_is_mesh(&sdata->vif) && elems.mesh_id && - elems.mesh_config && mesh_matches_local(&elems, dev)) { - u64 rates = ieee80211_sta_get_rates(local, &elems, - rx_status->band); - - mesh_neighbour_update(mgmt->sa, rates, dev, - mesh_peer_accepts_plinks(&elems, dev)); - } - - rcu_read_lock(); - - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && - memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && - (sta = sta_info_get(local, mgmt->sa))) { - u64 prev_rates; - u64 supp_rates = ieee80211_sta_get_rates(local, &elems, - rx_status->band); - - prev_rates = sta->supp_rates[rx_status->band]; - sta->supp_rates[rx_status->band] &= supp_rates; - if (sta->supp_rates[rx_status->band] == 0) { - /* No matching rates - this should not really happen. - * Make sure that at least one rate is marked - * supported to avoid issues with TX rate ctrl. */ - sta->supp_rates[rx_status->band] = - sdata->u.sta.supp_rates_bits[rx_status->band]; - } - if (sta->supp_rates[rx_status->band] != prev_rates) { - printk(KERN_DEBUG "%s: updated supp_rates set for " - "%s based on beacon info (0x%llx & 0x%llx -> " - "0x%llx)\n", - dev->name, print_mac(mac, sta->addr), - (unsigned long long) prev_rates, - (unsigned long long) supp_rates, - (unsigned long long) sta->supp_rates[rx_status->band]); - } - } - - rcu_read_unlock(); - - if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + if (elems->ds_params && elems->ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems->ds_params[0]); else freq = rx_status->freq; @@ -2605,188 +1522,65 @@ static void ieee80211_rx_bss_info(struct net_device *dev, if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; -#ifdef CONFIG_MAC80211_MESH - if (elems.mesh_config) - bss = ieee80211_rx_mesh_bss_get(dev, elems.mesh_id, - elems.mesh_id_len, elems.mesh_config, freq); - else -#endif - bss = ieee80211_rx_bss_get(dev, mgmt->bssid, freq, - elems.ssid, elems.ssid_len); - if (!bss) { -#ifdef CONFIG_MAC80211_MESH - if (elems.mesh_config) - bss = ieee80211_rx_mesh_bss_add(dev, elems.mesh_id, - elems.mesh_id_len, elems.mesh_config, - elems.mesh_config_len, freq); - else -#endif - bss = ieee80211_rx_bss_add(dev, mgmt->bssid, freq, - elems.ssid, elems.ssid_len); - if (!bss) - return; - } else { -#if 0 - /* TODO: order by RSSI? */ - spin_lock_bh(&local->sta_bss_lock); - list_move_tail(&bss->list, &local->sta_bss_list); - spin_unlock_bh(&local->sta_bss_lock); -#endif - } + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && + memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { + supp_rates = ieee80211_sta_get_rates(local, elems, band); - /* save the ERP value so that it is available at association time */ - if (elems.erp_info && elems.erp_info_len >= 1) { - bss->erp_value = elems.erp_info[0]; - bss->has_erp_value = 1; - } + rcu_read_lock(); - if (elems.ht_cap_elem && - (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len || - memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) { - kfree(bss->ht_ie); - bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC); - if (bss->ht_ie) { - memcpy(bss->ht_ie, elems.ht_cap_elem - 2, - elems.ht_cap_elem_len + 2); - bss->ht_ie_len = elems.ht_cap_elem_len + 2; - } else - bss->ht_ie_len = 0; - } else if (!elems.ht_cap_elem && bss->ht_ie) { - kfree(bss->ht_ie); - bss->ht_ie = NULL; - bss->ht_ie_len = 0; - } + sta = sta_info_get(local, mgmt->sa); + if (sta) { + u64 prev_rates; - bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); - bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); - - bss->supp_rates_len = 0; - if (elems.supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems.supp_rates_len) - clen = elems.supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], elems.supp_rates, - clen); - bss->supp_rates_len += clen; - } - if (elems.ext_supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems.ext_supp_rates_len) - clen = elems.ext_supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], - elems.ext_supp_rates, clen); - bss->supp_rates_len += clen; + prev_rates = sta->sta.supp_rates[band]; + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + if (sta->sta.supp_rates[band] != prev_rates) + printk(KERN_DEBUG "%s: updated supp_rates set " + "for %s based on beacon info (0x%llx | " + "0x%llx -> 0x%llx)\n", + sdata->dev->name, + print_mac(mac, sta->sta.addr), + (unsigned long long) prev_rates, + (unsigned long long) supp_rates, + (unsigned long long) sta->sta.supp_rates[band]); +#endif + } else { + ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid, + mgmt->sa, supp_rates); + } + + rcu_read_unlock(); } - bss->band = rx_status->band; + bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, + freq, beacon); + if (!bss) + return; - bss->timestamp = beacon_timestamp; - bss->last_update = jiffies; - bss->rssi = rx_status->ssi; - bss->signal = rx_status->signal; - bss->noise = rx_status->noise; - if (!beacon && !bss->probe_resp) - bss->probe_resp = true; + /* was just updated in ieee80211_bss_info_update */ + beacon_timestamp = bss->timestamp; /* * In STA mode, the remaining parameters should not be overridden * by beacons because they're not necessarily accurate there. */ - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - bss->probe_resp && beacon) { - ieee80211_rx_bss_put(dev, bss); + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + bss->last_probe_resp && beacon) { + ieee80211_rx_bss_put(local, bss); return; } - if (elems.wpa && - (!bss->wpa_ie || bss->wpa_ie_len != elems.wpa_len || - memcmp(bss->wpa_ie, elems.wpa, elems.wpa_len))) { - kfree(bss->wpa_ie); - bss->wpa_ie = kmalloc(elems.wpa_len + 2, GFP_ATOMIC); - if (bss->wpa_ie) { - memcpy(bss->wpa_ie, elems.wpa - 2, elems.wpa_len + 2); - bss->wpa_ie_len = elems.wpa_len + 2; - } else - bss->wpa_ie_len = 0; - } else if (!elems.wpa && bss->wpa_ie) { - kfree(bss->wpa_ie); - bss->wpa_ie = NULL; - bss->wpa_ie_len = 0; - } - - if (elems.rsn && - (!bss->rsn_ie || bss->rsn_ie_len != elems.rsn_len || - memcmp(bss->rsn_ie, elems.rsn, elems.rsn_len))) { - kfree(bss->rsn_ie); - bss->rsn_ie = kmalloc(elems.rsn_len + 2, GFP_ATOMIC); - if (bss->rsn_ie) { - memcpy(bss->rsn_ie, elems.rsn - 2, elems.rsn_len + 2); - bss->rsn_ie_len = elems.rsn_len + 2; - } else - bss->rsn_ie_len = 0; - } else if (!elems.rsn && bss->rsn_ie) { - kfree(bss->rsn_ie); - bss->rsn_ie = NULL; - bss->rsn_ie_len = 0; - } - - /* - * Cf. - * http://www.wipo.int/pctdb/en/wo.jsp?wo=2007047181&IA=WO2007047181&DISPLAY=DESC - * - * quoting: - * - * In particular, "Wi-Fi CERTIFIED for WMM - Support for Multimedia - * Applications with Quality of Service in Wi-Fi Networks," Wi- Fi - * Alliance (September 1, 2004) is incorporated by reference herein. - * The inclusion of the WMM Parameters in probe responses and - * association responses is mandatory for WMM enabled networks. The - * inclusion of the WMM Parameters in beacons, however, is optional. - */ - - if (elems.wmm_param && - (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_param_len || - memcmp(bss->wmm_ie, elems.wmm_param, elems.wmm_param_len))) { - kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems.wmm_param_len + 2, GFP_ATOMIC); - if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems.wmm_param - 2, - elems.wmm_param_len + 2); - bss->wmm_ie_len = elems.wmm_param_len + 2; - } else - bss->wmm_ie_len = 0; - } else if (elems.wmm_info && - (!bss->wmm_ie || bss->wmm_ie_len != elems.wmm_info_len || - memcmp(bss->wmm_ie, elems.wmm_info, elems.wmm_info_len))) { - /* As for certain AP's Fifth bit is not set in WMM IE in - * beacon frames.So while parsing the beacon frame the - * wmm_info structure is used instead of wmm_param. - * wmm_info structure was never used to set bss->wmm_ie. - * This code fixes this problem by copying the WME - * information from wmm_info to bss->wmm_ie and enabling - * n-band association. - */ - kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems.wmm_info_len + 2, GFP_ATOMIC); - if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems.wmm_info - 2, - elems.wmm_info_len + 2); - bss->wmm_ie_len = elems.wmm_info_len + 2; - } else - bss->wmm_ie_len = 0; - } else if (!elems.wmm_param && !elems.wmm_info && bss->wmm_ie) { - kfree(bss->wmm_ie); - bss->wmm_ie = NULL; - bss->wmm_ie_len = 0; - } - /* check if we need to merge IBSS */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon && - !local->sta_sw_scanning && !local->sta_hw_scanning && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && bss->capability & WLAN_CAPABILITY_IBSS && bss->freq == local->oper_channel->center_freq && - elems.ssid_len == sdata->u.sta.ssid_len && - memcmp(elems.ssid, sdata->u.sta.ssid, sdata->u.sta.ssid_len) == 0) { + elems->ssid_len == sdata->u.sta.ssid_len && + memcmp(elems->ssid, sdata->u.sta.ssid, + sdata->u.sta.ssid_len) == 0) { if (rx_status->flag & RX_FLAG_TSFT) { /* in order for correct IBSS merging we need mactime * @@ -2803,7 +1597,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev, * e.g: at 1 MBit that means mactime is 192 usec earlier * (=24 bytes * 8 usecs/byte) than the beacon timestamp. */ - int rate = local->hw.wiphy->bands[rx_status->band]-> + int rate = local->hw.wiphy->bands[band]-> bitrates[rx_status->rate_idx].bitrate; rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); } else if (local && local->ops && local->ops->get_tsf) @@ -2823,55 +1617,65 @@ static void ieee80211_rx_bss_info(struct net_device *dev, jiffies); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (beacon_timestamp > rx_timestamp) { -#ifndef CONFIG_MAC80211_IBSS_DEBUG - if (net_ratelimit()) +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: beacon TSF higher than " + "local TSF - IBSS merge with BSSID %s\n", + sdata->dev->name, print_mac(mac, mgmt->bssid)); #endif - printk(KERN_DEBUG "%s: beacon TSF higher than " - "local TSF - IBSS merge with BSSID %s\n", - dev->name, print_mac(mac, mgmt->bssid)); - ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss); - ieee80211_ibss_add_sta(dev, NULL, - mgmt->bssid, mgmt->sa); + ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); + ieee80211_ibss_add_sta(sdata, NULL, + mgmt->bssid, mgmt->sa, + supp_rates); } } - ieee80211_rx_bss_put(dev, bss); + ieee80211_rx_bss_put(local, bss); } -static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev, +static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 0); + size_t baselen; + struct ieee802_11_elems elems; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) + return; /* ignore ProbeResp to foreign address */ + + baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + &elems); + + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); + + /* direct probe may be part of the association flow */ + if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, + &ifsta->request)) { + printk(KERN_DEBUG "%s direct probe responded\n", + sdata->dev->name); + ieee80211_authenticate(sdata, ifsta); + } } -static void ieee80211_rx_mgmt_beacon(struct net_device *dev, +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; size_t baselen; struct ieee802_11_elems elems; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1); - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return; - ifsta = &sdata->u.sta; - - if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || - memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) - return; - /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) @@ -2879,16 +1683,18 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, - elems.wmm_param_len); - } + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); - /* Do not send changes to driver if we are scanning. This removes - * requirement that driver's bss_info_changed function needs to be - * atomic. */ - if (local->sta_sw_scanning || local->sta_hw_scanning) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; + ifsta = &sdata->u.sta; + + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || + memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) + return; + + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, + elems.wmm_param_len); if (elems.erp_info && elems.erp_info_len >= 1) changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); @@ -2913,14 +1719,13 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, } -static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, +static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int tx_last_beacon; struct sk_buff *skb; struct ieee80211_mgmt *resp; @@ -2931,8 +1736,8 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, DECLARE_MAC_BUF(mac3); #endif - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS || - ifsta->state != IEEE80211_IBSS_JOINED || + if (sdata->vif.type != NL80211_IFTYPE_ADHOC || + ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || len < 24 + 2 || !ifsta->probe_resp) return; @@ -2944,7 +1749,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID=" "%s (tx_last_beacon=%d)\n", - dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), + sdata->dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), print_mac(mac3, mgmt->bssid), tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ @@ -2959,11 +1764,11 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from %s\n", - dev->name, print_mac(mac, mgmt->sa)); - } +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " + "from %s\n", + sdata->dev->name, print_mac(mac, mgmt->sa)); +#endif return; } if (pos[1] != 0 && @@ -2982,71 +1787,15 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, memcpy(resp->da, mgmt->sa, ETH_ALEN); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n", - dev->name, print_mac(mac, resp->da)); + sdata->dev->name, print_mac(mac, resp->da)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); } -static void ieee80211_rx_mgmt_action(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (len < IEEE80211_MIN_ACTION_SIZE) - return; - - switch (mgmt->u.action.category) { - case WLAN_CATEGORY_BACK: - switch (mgmt->u.action.u.addba_req.action_code) { - case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) - break; - ieee80211_sta_process_addba_request(dev, mgmt, len); - break; - case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_sta_process_addba_resp(dev, mgmt, len); - break; - case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_sta_process_delba(dev, mgmt, len); - break; - default: - if (net_ratelimit()) - printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n", - dev->name); - break; - } - break; - case PLINK_CATEGORY: - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rx_plink_frame(dev, mgmt, len, rx_status); - break; - case MESH_PATH_SEL_CATEGORY: - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rx_path_sel_frame(dev, mgmt, len); - break; - default: - if (net_ratelimit()) - printk(KERN_DEBUG "%s: Rx unknown action frame - " - "category=%d\n", dev->name, mgmt->u.action.category); - break; - } -} - -void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, +void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; @@ -3054,7 +1803,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, if (skb->len < 24) goto fail; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifsta = &sdata->u.sta; mgmt = (struct ieee80211_mgmt *) skb->data; @@ -3064,7 +1812,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, case IEEE80211_STYPE_PROBE_REQ: case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_ACTION: memcpy(skb->cb, rx_status, sizeof(*rx_status)); case IEEE80211_STYPE_AUTH: case IEEE80211_STYPE_ASSOC_RESP: @@ -3074,28 +1821,20 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, skb_queue_tail(&ifsta->skb_queue, skb); queue_work(local->hw.workqueue, &ifsta->work); return; - default: - printk(KERN_DEBUG "%s: received unknown management frame - " - "stype=%d\n", dev->name, - (fc & IEEE80211_FCTL_STYPE) >> 4); - break; } fail: kfree_skb(skb); } - -static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, +static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; - struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifsta = &sdata->u.sta; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -3104,17 +1843,17 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(dev, ifsta, mgmt, skb->len, + ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_AUTH: - ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0); @@ -3123,13 +1862,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1); break; case IEEE80211_STYPE_DEAUTH: - ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_ACTION: - ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len); break; } @@ -3137,48 +1873,11 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, } -ieee80211_rx_result -ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status) -{ - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 2) - return RX_DROP_UNUSABLE; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) - return RX_CONTINUE; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) { - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) { - ieee80211_rx_mgmt_probe_resp(dev, mgmt, - skb->len, rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) { - ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, - rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } - } - return RX_CONTINUE; -} - - -static int ieee80211_sta_active_ibss(struct net_device *dev) +static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; int active = 0; struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -3197,180 +1896,36 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) } -static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta, *tmp; - LIST_HEAD(tmp_list); - DECLARE_MAC_BUF(mac); - unsigned long flags; - - spin_lock_irqsave(&local->sta_lock, flags); - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) - if (time_after(jiffies, sta->last_rx + exp_time)) { - printk(KERN_DEBUG "%s: expiring inactive STA %s\n", - dev->name, print_mac(mac, sta->addr)); - __sta_info_unlink(&sta); - if (sta) - list_add(&sta->list, &tmp_list); - } - spin_unlock_irqrestore(&local->sta_lock, flags); - - list_for_each_entry_safe(sta, tmp, &tmp_list, list) - sta_info_destroy(sta); -} - - -static void ieee80211_sta_merge_ibss(struct net_device *dev, +static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - ieee80211_sta_expire(dev, IEEE80211_IBSS_INACTIVITY_LIMIT); - if (ieee80211_sta_active_ibss(dev)) + ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); + if (ieee80211_sta_active_ibss(sdata)) return; printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " - "IBSS networks with same SSID (merge)\n", dev->name); - ieee80211_sta_req_scan(dev, ifsta->ssid, ifsta->ssid_len); + "IBSS networks with same SSID (merge)\n", sdata->dev->name); + ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); } -#ifdef CONFIG_MAC80211_MESH -static void ieee80211_mesh_housekeeping(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - bool free_plinks; - - ieee80211_sta_expire(dev, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); - mesh_path_expire(dev); - - free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.sta.accepting_plinks) - ieee80211_if_config_beacon(dev); - - mod_timer(&ifsta->timer, jiffies + - IEEE80211_MESH_HOUSEKEEPING_INTERVAL); -} - - -void ieee80211_start_mesh(struct net_device *dev) -{ - struct ieee80211_if_sta *ifsta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ifsta = &sdata->u.sta; - ifsta->state = IEEE80211_MESH_UP; - ieee80211_sta_timer((unsigned long)sdata); -} -#endif - - -void ieee80211_sta_timer(unsigned long data) +static void ieee80211_sta_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(&sdata->wdev); + struct ieee80211_local *local = sdata->local; set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); } -void ieee80211_sta_work(struct work_struct *work) -{ - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.sta.work); - struct net_device *dev = sdata->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_sta *ifsta; - struct sk_buff *skb; - - if (!netif_running(dev)) - return; - - if (local->sta_sw_scanning || local->sta_hw_scanning) - return; - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) { - printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface " - "(type=%d)\n", dev->name, sdata->vif.type); - return; - } - ifsta = &sdata->u.sta; - - while ((skb = skb_dequeue(&ifsta->skb_queue))) - ieee80211_sta_rx_queued_mgmt(dev, skb); - -#ifdef CONFIG_MAC80211_MESH - if (ifsta->preq_queue_len && - time_after(jiffies, - ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval))) - mesh_path_start_discovery(dev); -#endif - - if (ifsta->state != IEEE80211_AUTHENTICATE && - ifsta->state != IEEE80211_ASSOCIATE && - test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { - if (ifsta->scan_ssid_len) - ieee80211_sta_start_scan(dev, ifsta->scan_ssid, ifsta->scan_ssid_len); - else - ieee80211_sta_start_scan(dev, NULL, 0); - return; - } - - if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { - if (ieee80211_sta_config_auth(dev, ifsta)) - return; - clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); - } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) - return; - - switch (ifsta->state) { - case IEEE80211_DISABLED: - break; - case IEEE80211_AUTHENTICATE: - ieee80211_authenticate(dev, ifsta); - break; - case IEEE80211_ASSOCIATE: - ieee80211_associate(dev, ifsta); - break; - case IEEE80211_ASSOCIATED: - ieee80211_associated(dev, ifsta); - break; - case IEEE80211_IBSS_SEARCH: - ieee80211_sta_find_ibss(dev, ifsta); - break; - case IEEE80211_IBSS_JOINED: - ieee80211_sta_merge_ibss(dev, ifsta); - break; -#ifdef CONFIG_MAC80211_MESH - case IEEE80211_MESH_UP: - ieee80211_mesh_housekeeping(dev, ifsta); - break; -#endif - default: - printk(KERN_DEBUG "ieee80211_sta_work: Unknown state %d\n", - ifsta->state); - break; - } - - if (ieee80211_privacy_mismatch(dev, ifsta)) { - printk(KERN_DEBUG "%s: privacy configuration mismatch and " - "mixed-cell disabled - disassociate\n", dev->name); - - ieee80211_send_disassoc(dev, ifsta, WLAN_REASON_UNSPECIFIED); - ieee80211_set_disassoc(dev, ifsta, 0); - } -} - - -static void ieee80211_sta_reset_auth(struct net_device *dev, +static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; if (local->ops->reset_tsf) { /* Reset own TSF to allow time synchronization work. */ @@ -3388,33 +1943,17 @@ static void ieee80211_sta_reset_auth(struct net_device *dev, ifsta->auth_alg = WLAN_AUTH_LEAP; else ifsta->auth_alg = WLAN_AUTH_OPEN; - printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name, - ifsta->auth_alg); ifsta->auth_transaction = -1; ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - ifsta->auth_tries = ifsta->assoc_tries = 0; - netif_carrier_off(dev); + ifsta->assoc_scan_tries = 0; + ifsta->direct_probe_tries = 0; + ifsta->auth_tries = 0; + ifsta->assoc_tries = 0; + netif_tx_stop_all_queues(sdata->dev); + netif_carrier_off(sdata->dev); } -void ieee80211_sta_req_auth(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return; - - if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | - IEEE80211_STA_AUTO_BSSID_SEL)) && - (ifsta->flags & (IEEE80211_STA_SSID_SET | - IEEE80211_STA_AUTO_SSID_SEL))) { - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); - } -} - static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, const char *ssid, int ssid_len) { @@ -3445,81 +1984,11 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, return 0; } -static int ieee80211_sta_config_auth(struct net_device *dev, +static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_sta_bss *bss, *selected = NULL; - int top_rssi = 0, freq; - - spin_lock_bh(&local->sta_bss_lock); - freq = local->oper_channel->center_freq; - list_for_each_entry(bss, &local->sta_bss_list, list) { - if (!(bss->capability & WLAN_CAPABILITY_ESS)) - continue; - - if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | - IEEE80211_STA_AUTO_CHANNEL_SEL)) && - (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && - bss->freq != freq) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && - memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && - !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) - continue; - - if (!selected || top_rssi < bss->rssi) { - selected = bss; - top_rssi = bss->rssi; - } - } - if (selected) - atomic_inc(&selected->users); - spin_unlock_bh(&local->sta_bss_lock); - - if (selected) { - ieee80211_set_freq(dev, selected->freq); - if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) - ieee80211_sta_set_ssid(dev, selected->ssid, - selected->ssid_len); - ieee80211_sta_set_bssid(dev, selected->bssid); - ieee80211_sta_def_wmm_params(dev, selected, 0); - ieee80211_rx_bss_put(dev, selected); - ifsta->state = IEEE80211_AUTHENTICATE; - ieee80211_sta_reset_auth(dev, ifsta); - return 0; - } else { - if (ifsta->state != IEEE80211_AUTHENTICATE) { - if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) - ieee80211_sta_start_scan(dev, NULL, 0); - else - ieee80211_sta_start_scan(dev, ifsta->ssid, - ifsta->ssid_len); - ifsta->state = IEEE80211_AUTHENTICATE; - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - } else - ifsta->state = IEEE80211_DISABLED; - } - return -1; -} - - -static int ieee80211_sta_create_ibss(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; struct ieee80211_supported_band *sband; u8 bssid[ETH_ALEN], *pos; int i; @@ -3535,15 +2004,15 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, * random number generator get different BSSID. */ get_random_bytes(bssid, ETH_ALEN); for (i = 0; i < ETH_ALEN; i++) - bssid[i] ^= dev->dev_addr[i]; + bssid[i] ^= sdata->dev->dev_addr[i]; bssid[0] &= ~0x01; bssid[0] |= 0x02; #endif printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", - dev->name, print_mac(mac, bssid)); + sdata->dev->name, print_mac(mac, bssid)); - bss = ieee80211_rx_bss_add(dev, bssid, + bss = ieee80211_rx_bss_add(local, bssid, local->hw.conf.channel->center_freq, sdata->u.sta.ssid, sdata->u.sta.ssid_len); if (!bss) @@ -3553,14 +2022,16 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, sband = local->hw.wiphy->bands[bss->band]; if (local->hw.conf.beacon_int == 0) - local->hw.conf.beacon_int = 10000; + local->hw.conf.beacon_int = 100; bss->beacon_int = local->hw.conf.beacon_int; bss->last_update = jiffies; bss->capability = WLAN_CAPABILITY_IBSS; - if (sdata->default_key) { + + if (sdata->default_key) bss->capability |= WLAN_CAPABILITY_PRIVACY; - } else + else sdata->drop_unencrypted = 0; + bss->supp_rates_len = sband->n_bitrates; pos = bss->supp_rates; for (i = 0; i < sband->n_bitrates; i++) { @@ -3568,17 +2039,17 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, *pos++ = (u8) (rate / 5); } - ret = ieee80211_sta_join_ibss(dev, ifsta, bss); - ieee80211_rx_bss_put(dev, bss); + ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); + ieee80211_rx_bss_put(local, bss); return ret; } -static int ieee80211_sta_find_ibss(struct net_device *dev, +static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; int found = 0; u8 bssid[ETH_ALEN]; int active_ibss; @@ -3588,13 +2059,13 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (ifsta->ssid_len == 0) return -EINVAL; - active_ibss = ieee80211_sta_active_ibss(dev); + active_ibss = ieee80211_sta_active_ibss(sdata); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", - dev->name, active_ibss); + sdata->dev->name, active_ibss); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - spin_lock_bh(&local->sta_bss_lock); - list_for_each_entry(bss, &local->sta_bss_list, list) { + spin_lock_bh(&local->bss_lock); + list_for_each_entry(bss, &local->bss_list, list) { if (ifsta->ssid_len != bss->ssid_len || memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0 || !(bss->capability & WLAN_CAPABILITY_IBSS)) @@ -3608,40 +2079,54 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0) break; } - spin_unlock_bh(&local->sta_bss_lock); + spin_unlock_bh(&local->bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected %s current " - "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); + if (found) + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), + print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && - (bss = ieee80211_rx_bss_get(dev, bssid, - local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len))) { + + if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { int ret; + int search_freq; + + if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) + search_freq = bss->freq; + else + search_freq = local->hw.conf.channel->center_freq; + + bss = ieee80211_rx_bss_get(local, bssid, search_freq, + ifsta->ssid, ifsta->ssid_len); + if (!bss) + goto dont_join; + printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", - dev->name, print_mac(mac, bssid)); - ret = ieee80211_sta_join_ibss(dev, ifsta, bss); - ieee80211_rx_bss_put(dev, bss); + sdata->dev->name, print_mac(mac, bssid)); + ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); + ieee80211_rx_bss_put(local, bss); return ret; } + +dont_join: #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ /* Selected IBSS not found in current scan results - try to scan */ - if (ifsta->state == IEEE80211_IBSS_JOINED && - !ieee80211_sta_active_ibss(dev)) { + if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED && + !ieee80211_sta_active_ibss(sdata)) { mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); } else if (time_after(jiffies, local->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " - "join\n", dev->name); - return ieee80211_sta_req_scan(dev, ifsta->ssid, + "join\n", sdata->dev->name); + return ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); - } else if (ifsta->state != IEEE80211_IBSS_JOINED) { + } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) { int interval = IEEE80211_SCAN_INTERVAL; if (time_after(jiffies, ifsta->ibss_join_req + @@ -3649,10 +2134,10 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && (!(local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS))) - return ieee80211_sta_create_ibss(dev, ifsta); + return ieee80211_sta_create_ibss(sdata, ifsta); if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on" - " %d MHz\n", dev->name, + " %d MHz\n", sdata->dev->name, local->hw.conf.channel->center_freq); } @@ -3661,7 +2146,7 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, interval = IEEE80211_SCAN_INTERVAL_SLOW; } - ifsta->state = IEEE80211_IBSS_SEARCH; + ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; mod_timer(&ifsta->timer, jiffies + interval); return 0; } @@ -3670,583 +2155,346 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, } -int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len) +static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss, *selected = NULL; + int top_rssi = 0, freq; - if (len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; + spin_lock_bh(&local->bss_lock); + freq = local->oper_channel->center_freq; + list_for_each_entry(bss, &local->bss_list, list) { + if (!(bss->capability & WLAN_CAPABILITY_ESS)) + continue; - ifsta = &sdata->u.sta; + if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) && + (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ + !!sdata->default_key)) + continue; - if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) - ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifsta->ssid, ssid, len); - memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); - ifsta->ssid_len = len; + if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && + bss->freq != freq) + continue; - if (len) - ifsta->flags |= IEEE80211_STA_SSID_SET; - else - ifsta->flags &= ~IEEE80211_STA_SSID_SET; - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { - ifsta->ibss_join_req = jiffies; - ifsta->state = IEEE80211_IBSS_SEARCH; - return ieee80211_sta_find_ibss(dev, ifsta); + if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && + memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) + continue; + + if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && + !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) + continue; + + if (!selected || top_rssi < bss->signal) { + selected = bss; + top_rssi = bss->signal; + } } - return 0; -} + if (selected) + atomic_inc(&selected->users); + spin_unlock_bh(&local->bss_lock); + if (selected) { + ieee80211_set_freq(sdata, selected->freq); + if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) + ieee80211_sta_set_ssid(sdata, selected->ssid, + selected->ssid_len); + ieee80211_sta_set_bssid(sdata, selected->bssid); + ieee80211_sta_def_wmm_params(sdata, selected); -int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - memcpy(ssid, ifsta->ssid, ifsta->ssid_len); - *len = ifsta->ssid_len; - return 0; + /* Send out direct probe if no probe resp was received or + * the one we have is outdated + */ + if (!selected->last_probe_resp || + time_after(jiffies, selected->last_probe_resp + + IEEE80211_SCAN_RESULT_EXPIRE)) + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; + else + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + + ieee80211_rx_bss_put(local, selected); + ieee80211_sta_reset_auth(sdata, ifsta); + return 0; + } else { + if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { + ifsta->assoc_scan_tries++; + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) + ieee80211_start_scan(sdata, NULL, 0); + else + ieee80211_start_scan(sdata, ifsta->ssid, + ifsta->ssid_len); + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); + } else + ifsta->state = IEEE80211_STA_MLME_DISABLED; + } + return -1; } -int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid) +static void ieee80211_sta_work(struct work_struct *work) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.sta.work); + struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; - int res; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ifsta = &sdata->u.sta; + struct sk_buff *skb; - if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { - memcpy(ifsta->bssid, bssid, ETH_ALEN); - res = ieee80211_if_config(dev); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new BSSID to " - "the low-level driver\n", dev->name); - return res; - } - } + if (!netif_running(sdata->dev)) + return; - if (is_valid_ether_addr(bssid)) - ifsta->flags |= IEEE80211_STA_BSSID_SET; - else - ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + if (local->sw_scanning || local->hw_scanning) + return; - return 0; -} + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC)) + return; + ifsta = &sdata->u.sta; + while ((skb = skb_dequeue(&ifsta->skb_queue))) + ieee80211_sta_rx_queued_mgmt(sdata, skb); -static void ieee80211_send_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - int powersave) -{ - struct sk_buff *skb; - struct ieee80211_hdr *nullfunc; - u16 fc; + if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && + ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && + ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && + test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { + ieee80211_start_scan(sdata, ifsta->scan_ssid, + ifsta->scan_ssid_len); + return; + } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " - "frame\n", sdata->dev->name); + if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { + if (ieee80211_sta_config_auth(sdata, ifsta)) + return; + clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); + } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) return; + + switch (ifsta->state) { + case IEEE80211_STA_MLME_DISABLED: + break; + case IEEE80211_STA_MLME_DIRECT_PROBE: + ieee80211_direct_probe(sdata, ifsta); + break; + case IEEE80211_STA_MLME_AUTHENTICATE: + ieee80211_authenticate(sdata, ifsta); + break; + case IEEE80211_STA_MLME_ASSOCIATE: + ieee80211_associate(sdata, ifsta); + break; + case IEEE80211_STA_MLME_ASSOCIATED: + ieee80211_associated(sdata, ifsta); + break; + case IEEE80211_STA_MLME_IBSS_SEARCH: + ieee80211_sta_find_ibss(sdata, ifsta); + break; + case IEEE80211_STA_MLME_IBSS_JOINED: + ieee80211_sta_merge_ibss(sdata, ifsta); + break; + default: + WARN_ON(1); + break; } - skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); - memset(nullfunc, 0, 24); - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | - IEEE80211_FCTL_TODS; - if (powersave) - fc |= IEEE80211_FCTL_PM; - nullfunc->frame_control = cpu_to_le16(fc); - memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); - - ieee80211_sta_tx(sdata->dev, skb, 0); -} + if (ieee80211_privacy_mismatch(sdata, ifsta)) { + printk(KERN_DEBUG "%s: privacy configuration mismatch and " + "mixed-cell disabled - disassociate\n", sdata->dev->name); + ieee80211_set_disassoc(sdata, ifsta, false, true, + WLAN_REASON_UNSPECIFIED); + } +} static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_sta_timer((unsigned long)sdata); + if (sdata->vif.type == NL80211_IFTYPE_STATION) + queue_work(sdata->local->hw.workqueue, + &sdata->u.sta.work); } -void ieee80211_scan_completed(struct ieee80211_hw *hw) +/* interface setup */ +void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = hw_to_local(hw); - struct net_device *dev = local->scan_dev; - struct ieee80211_sub_if_data *sdata; - union iwreq_data wrqu; + struct ieee80211_if_sta *ifsta; - local->last_scan_completed = jiffies; - memset(&wrqu, 0, sizeof(wrqu)); - wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); - - if (local->sta_hw_scanning) { - local->sta_hw_scanning = 0; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", dev->name); - /* Restart STA timer for HW scan case */ - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); - rcu_read_unlock(); + ifsta = &sdata->u.sta; + INIT_WORK(&ifsta->work, ieee80211_sta_work); + setup_timer(&ifsta->timer, ieee80211_sta_timer, + (unsigned long) sdata); + skb_queue_head_init(&ifsta->skb_queue); - goto done; - } + ifsta->capab = WLAN_CAPABILITY_ESS; + ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | + IEEE80211_AUTH_ALG_SHARED_KEY; + ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; + if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) + ifsta->flags |= IEEE80211_STA_WMM_ENABLED; +} - local->sta_sw_scanning = 0; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", dev->name); +/* + * Add a new IBSS station, will also be called by the RX code when, + * in IBSS mode, receiving a frame from a yet-unknown station, hence + * must be callable in atomic context. + */ +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 *bssid, + u8 *addr, u64 supp_rates) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + DECLARE_MAC_BUF(mac); + int band = local->hw.conf.channel->band; + /* TODO: Could consider removing the least recently used entry and + * allow new one to be added. */ + if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: No room for a new IBSS STA " + "entry %s\n", sdata->dev->name, print_mac(mac, addr)); + } + return NULL; + } - netif_tx_lock_bh(local->mdev); - local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; - local->ops->configure_filter(local_to_hw(local), - FIF_BCN_PRBRESP_PROMISC, - &local->filter_flags, - local->mdev->mc_count, - local->mdev->mc_list); + if (compare_ether_addr(bssid, sdata->u.sta.bssid)) + return NULL; - netif_tx_unlock_bh(local->mdev); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr), sdata->dev->name); +#endif - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); + if (!sta) + return NULL; - /* No need to wake the master device. */ - if (sdata->dev == local->mdev) - continue; + set_sta_flags(sta, WLAN_STA_AUTHORIZED); - /* Tell AP we're back */ - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) - ieee80211_send_nullfunc(local, sdata, 0); + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); - ieee80211_restart_sta_timer(sdata); + rate_control_rate_init(sta); - netif_wake_queue(sdata->dev); - } - rcu_read_unlock(); + if (sta_info_insert(sta)) + return NULL; -done: - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || - (!ifsta->state == IEEE80211_IBSS_JOINED && - !ieee80211_sta_active_ibss(dev))) - ieee80211_sta_find_ibss(dev, ifsta); - } + return sta; } -EXPORT_SYMBOL(ieee80211_scan_completed); -void ieee80211_sta_scan_work(struct work_struct *work) +/* configuration hooks */ +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, scan_work.work); - struct net_device *dev = local->scan_dev; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; - int skip; - unsigned long next_delay = 0; + struct ieee80211_local *local = sdata->local; - if (!local->sta_sw_scanning) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - switch (local->scan_state) { - case SCAN_SET_CHANNEL: - /* - * Get current scan band. scan_band may be IEEE80211_NUM_BANDS - * after we successfully scanned the last channel of the last - * band (and the last band is supported by the hw) - */ - if (local->scan_band < IEEE80211_NUM_BANDS) - sband = local->hw.wiphy->bands[local->scan_band]; - else - sband = NULL; - - /* - * If we are at an unsupported band and have more bands - * left to scan, advance to the next supported one. - */ - while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) { - local->scan_band++; - sband = local->hw.wiphy->bands[local->scan_band]; - local->scan_channel_idx = 0; - } - - /* if no more bands/channels left, complete scan */ - if (!sband || local->scan_channel_idx >= sband->n_channels) { - ieee80211_scan_completed(local_to_hw(local)); - return; - } - skip = 0; - chan = &sband->channels[local->scan_channel_idx]; - - if (chan->flags & IEEE80211_CHAN_DISABLED || - (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - chan->flags & IEEE80211_CHAN_NO_IBSS)) - skip = 1; - - if (!skip) { - local->scan_channel = chan; - if (ieee80211_hw_config(local)) { - printk(KERN_DEBUG "%s: failed to set freq to " - "%d MHz for scan\n", dev->name, - chan->center_freq); - skip = 1; - } - } - - /* advance state machine to next channel/band */ - local->scan_channel_idx++; - if (local->scan_channel_idx >= sband->n_channels) { - /* - * scan_band may end up == IEEE80211_NUM_BANDS, but - * we'll catch that case above and complete the scan - * if that is the case. - */ - local->scan_band++; - local->scan_channel_idx = 0; - } - - if (skip) - break; + if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + IEEE80211_STA_AUTO_BSSID_SEL)) && + (ifsta->flags & (IEEE80211_STA_SSID_SET | + IEEE80211_STA_AUTO_SSID_SEL))) { - next_delay = IEEE80211_PROBE_DELAY + - usecs_to_jiffies(local->hw.channel_change_time); - local->scan_state = SCAN_SEND_PROBE; - break; - case SCAN_SEND_PROBE: - next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; - local->scan_state = SCAN_SET_CHANNEL; + if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, ifsta, true, true, + WLAN_REASON_DEAUTH_LEAVING); - if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) - break; - ieee80211_send_probe_req(dev, NULL, local->scan_ssid, - local->scan_ssid_len); - next_delay = IEEE80211_CHANNEL_TIME; - break; + set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); + queue_work(local->hw.workqueue, &ifsta->work); } - - if (local->sta_sw_scanning) - queue_delayed_work(local->hw.workqueue, &local->scan_work, - next_delay); } - -static int ieee80211_sta_start_scan(struct net_device *dev, - u8 *ssid, size_t ssid_len) +int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_sta *ifsta; + int res; - if (ssid_len > IEEE80211_MAX_SSID_LEN) + if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) - * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS - * BSSID: MACAddress - * SSID - * ScanType: ACTIVE, PASSIVE - * ProbeDelay: delay (in microseconds) to be used prior to transmitting - * a Probe frame during active scanning - * ChannelList - * MinChannelTime (>= ProbeDelay), in TU - * MaxChannelTime: (>= MinChannelTime), in TU - */ - - /* MLME-SCAN.confirm - * BSSDescriptionSet - * ResultCode: SUCCESS, INVALID_PARAMETERS - */ + ifsta = &sdata->u.sta; - if (local->sta_sw_scanning || local->sta_hw_scanning) { - if (local->scan_dev == dev) - return 0; - return -EBUSY; - } + if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { + memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); + memcpy(ifsta->ssid, ssid, len); + ifsta->ssid_len = len; + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - if (local->ops->hw_scan) { - int rc = local->ops->hw_scan(local_to_hw(local), - ssid, ssid_len); - if (!rc) { - local->sta_hw_scanning = 1; - local->scan_dev = dev; + res = 0; + /* + * Hack! MLME code needs to be cleaned up to have different + * entry points for configuration and internal selection change + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); + if (res) { + printk(KERN_DEBUG "%s: Failed to config new SSID to " + "the low-level driver\n", sdata->dev->name); + return res; } - return rc; } - local->sta_sw_scanning = 1; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - - /* Don't stop the master interface, otherwise we can't transmit - * probes! */ - if (sdata->dev == local->mdev) - continue; + if (len) + ifsta->flags |= IEEE80211_STA_SSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_SSID_SET; - netif_stop_queue(sdata->dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) - ieee80211_send_nullfunc(local, sdata, 1); + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { + ifsta->ibss_join_req = jiffies; + ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; + return ieee80211_sta_find_ibss(sdata, ifsta); } - rcu_read_unlock(); - - if (ssid) { - local->scan_ssid_len = ssid_len; - memcpy(local->scan_ssid, ssid, ssid_len); - } else - local->scan_ssid_len = 0; - local->scan_state = SCAN_SET_CHANNEL; - local->scan_channel_idx = 0; - local->scan_band = IEEE80211_BAND_2GHZ; - local->scan_dev = dev; - - netif_tx_lock_bh(local->mdev); - local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; - local->ops->configure_filter(local_to_hw(local), - FIF_BCN_PRBRESP_PROMISC, - &local->filter_flags, - local->mdev->mc_count, - local->mdev->mc_list); - netif_tx_unlock_bh(local->mdev); - - /* TODO: start scan as soon as all nullfunc frames are ACKed */ - queue_delayed_work(local->hw.workqueue, &local->scan_work, - IEEE80211_CHANNEL_TIME); return 0; } - -int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len) +int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return ieee80211_sta_start_scan(dev, ssid, ssid_len); - - if (local->sta_sw_scanning || local->sta_hw_scanning) { - if (local->scan_dev == dev) - return 0; - return -EBUSY; - } - - ifsta->scan_ssid_len = ssid_len; - if (ssid_len) - memcpy(ifsta->scan_ssid, ssid, ssid_len); - set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); + memcpy(ssid, ifsta->ssid, ifsta->ssid_len); + *len = ifsta->ssid_len; return 0; } -static char * -ieee80211_sta_scan_result(struct net_device *dev, - struct ieee80211_sta_bss *bss, - char *current_ev, char *end_buf) +int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct iw_event iwe; - - if (time_after(jiffies, - bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE)) - return current_ev; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWAP; - iwe.u.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_ADDR_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWESSID; - if (bss_mesh_cfg(bss)) { - iwe.u.data.length = bss_mesh_id_len(bss); - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss_mesh_id(bss)); - } else { - iwe.u.data.length = bss->ssid_len; - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->ssid); - } - - if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) - || bss_mesh_cfg(bss)) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWMODE; - if (bss_mesh_cfg(bss)) - iwe.u.mode = IW_MODE_MESH; - else if (bss->capability & WLAN_CAPABILITY_ESS) - iwe.u.mode = IW_MODE_MASTER; - else - iwe.u.mode = IW_MODE_ADHOC; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_UINT_LEN); - } - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVQUAL; - iwe.u.qual.qual = bss->signal; - iwe.u.qual.level = bss->rssi; - iwe.u.qual.noise = bss->noise; - iwe.u.qual.updated = local->wstats_flags; - current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, - IW_EV_QUAL_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWENCODE; - if (bss->capability & WLAN_CAPABILITY_PRIVACY) - iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; - else - iwe.u.data.flags = IW_ENCODE_DISABLED; - iwe.u.data.length = 0; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, ""); - - if (bss && bss->wpa_ie) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = bss->wpa_ie_len; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->wpa_ie); - } - - if (bss && bss->rsn_ie) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = bss->rsn_ie_len; - current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe, - bss->rsn_ie); - } - - if (bss && bss->supp_rates_len > 0) { - /* display all supported rates in readable format */ - char *p = current_ev + IW_EV_LCP_LEN; - int i; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWRATE; - /* Those two flags are ignored... */ - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - - for (i = 0; i < bss->supp_rates_len; i++) { - iwe.u.bitrate.value = ((bss->supp_rates[i] & - 0x7f) * 500000); - p = iwe_stream_add_value(current_ev, p, - end_buf, &iwe, IW_EV_PARAM_LEN); - } - current_ev = p; - } + struct ieee80211_if_sta *ifsta; + int res; - if (bss) { - char *buf; - buf = kmalloc(30, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - kfree(buf); - } - } + ifsta = &sdata->u.sta; - if (bss_mesh_cfg(bss)) { - char *buf; - u8 *cfg = bss_mesh_cfg(bss); - buf = kmalloc(50, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh network (version %d)", cfg[0]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Protocol ID: " - "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], - cfg[4]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Metric ID: " - "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], - cfg[8]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - sprintf(buf, "Congestion Control Mode ID: " - "0x%02X%02X%02X%02X", cfg[9], cfg[10], - cfg[11], cfg[12]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - sprintf(buf, "Channel Precedence: " - "0x%02X%02X%02X%02X", cfg[13], cfg[14], - cfg[15], cfg[16]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(current_ev, end_buf, - &iwe, buf); - kfree(buf); + if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { + memcpy(ifsta->bssid, bssid, ETH_ALEN); + res = 0; + /* + * Hack! See also ieee80211_sta_set_ssid. + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); + if (res) { + printk(KERN_DEBUG "%s: Failed to config new BSSID to " + "the low-level driver\n", sdata->dev->name); + return res; } } - return current_ev; -} - + if (is_valid_ether_addr(bssid)) + ifsta->flags |= IEEE80211_STA_BSSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; -int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - char *current_ev = buf; - char *end_buf = buf + len; - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - list_for_each_entry(bss, &local->sta_bss_list, list) { - if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&local->sta_bss_lock); - return -E2BIG; - } - current_ev = ieee80211_sta_scan_result(dev, bss, current_ev, - end_buf); - } - spin_unlock_bh(&local->sta_bss_lock); - return current_ev - buf; + return 0; } - -int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) +int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; + kfree(ifsta->extra_ie); if (len == 0) { ifsta->extra_ie = NULL; @@ -4263,84 +2511,60 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) return 0; } - -struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, - struct sk_buff *skb, u8 *bssid, - u8 *addr) +int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - DECLARE_MAC_BUF(mac); - - /* TODO: Could consider removing the least recently used entry and - * allow new one to be added. */ - if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry %s\n", dev->name, print_mac(mac, addr)); - } - return NULL; - } - - printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", - wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name); - - sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); - if (!sta) - return NULL; - - sta->flags |= WLAN_STA_AUTHORIZED; - - sta->supp_rates[local->hw.conf.channel->band] = - sdata->u.sta.supp_rates_bits[local->hw.conf.channel->band]; - - rate_control_rate_init(sta, local); - - if (sta_info_insert(sta)) - return NULL; - - return sta; -} - - -int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n", - dev->name, reason); + printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", + sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; - ieee80211_send_deauth(dev, ifsta, reason); - ieee80211_set_disassoc(dev, ifsta, 1); + ieee80211_set_disassoc(sdata, ifsta, true, true, reason); return 0; } - -int ieee80211_sta_disassociate(struct net_device *dev, u16 reason) +int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - printk(KERN_DEBUG "%s: disassociate(reason=%d)\n", - dev->name, reason); + printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", + sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) return -1; - ieee80211_send_disassoc(dev, ifsta, reason); - ieee80211_set_disassoc(dev, ifsta, 0); + ieee80211_set_disassoc(sdata, ifsta, false, true, reason); return 0; } +/* scan finished notification */ +void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_if_sta *ifsta; + + if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { + ifsta = &sdata->u.sta; + if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || + (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) && + !ieee80211_sta_active_ibss(sdata))) + ieee80211_sta_find_ibss(sdata, ifsta); + } + + /* Restart STA timers */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + ieee80211_restart_sta_timer(sdata); + rcu_read_unlock(); +} + +/* driver notification call */ void ieee80211_notify_mac(struct ieee80211_hw *hw, enum ieee80211_notification_types notif_type) { @@ -4351,12 +2575,10 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, case IEEE80211_NOTIFY_RE_ASSOC: rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->vif.type != NL80211_IFTYPE_STATION) + continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { - ieee80211_sta_req_auth(sdata->dev, - &sdata->u.sta); - } - + ieee80211_sta_req_auth(sdata, &sdata->u.sta); } rcu_read_unlock(); break; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 841df93807fc..5d786720d935 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -12,6 +12,7 @@ #include <linux/rtnetlink.h> #include "rate.h" #include "ieee80211_i.h" +#include "debugfs.h" struct rate_control_alg { struct list_head list; @@ -127,19 +128,46 @@ static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops) module_put(ops->module); } +#ifdef CONFIG_MAC80211_DEBUGFS +static ssize_t rcname_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct rate_control_ref *ref = file->private_data; + int len = strlen(ref->ops->name); + + return simple_read_from_buffer(userbuf, count, ppos, + ref->ops->name, len); +} + +static const struct file_operations rcname_ops = { + .read = rcname_read, + .open = mac80211_open_file_generic, +}; +#endif + struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local) { + struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) goto fail_ref; kref_init(&ref->kref); + ref->local = local; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto fail_ops; - ref->priv = ref->ops->alloc(local); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir, + ref, &rcname_ops); +#endif + + ref->priv = ref->ops->alloc(&local->hw, debugfsdir); if (!ref->priv) goto fail_priv; return ref; @@ -158,42 +186,61 @@ static void rate_control_release(struct kref *kref) ctrl_ref = container_of(kref, struct rate_control_ref, kref); ctrl_ref->ops->free(ctrl_ref->priv); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfs_remove(ctrl_ref->local->debugfs.rcname); + ctrl_ref->local->debugfs.rcname = NULL; + debugfs_remove(ctrl_ref->local->debugfs.rcdir); + ctrl_ref->local->debugfs.rcdir = NULL; +#endif + ieee80211_rate_control_ops_put(ctrl_ref->ops); kfree(ctrl_ref); } -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; + struct rate_control_ref *ref = sdata->local->rate_ctrl; + void *priv_sta = NULL; + struct ieee80211_sta *ista = NULL; int i; - rcu_read_lock(); - sta = sta_info_get(local, hdr->addr1); + sel->rate_idx = -1; + sel->nonerp_idx = -1; + sel->probe_idx = -1; + sel->max_rate_idx = sdata->max_ratectrl_rateidx; - memset(sel, 0, sizeof(struct rate_selection)); + if (sta) { + ista = &sta->sta; + priv_sta = sta->rate_ctrl_priv; + } - ref->ops->get_rate(ref->priv, dev, sband, skb, sel); + if (sta && sdata->force_unicast_rateidx > -1) + sel->rate_idx = sdata->force_unicast_rateidx; + else + ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); + + if (sdata->max_ratectrl_rateidx > -1 && + sel->rate_idx > sdata->max_ratectrl_rateidx) + sel->rate_idx = sdata->max_ratectrl_rateidx; + + BUG_ON(sel->rate_idx < 0); /* Select a non-ERP backup rate. */ - if (!sel->nonerp) { + if (sel->nonerp_idx < 0) { for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; - if (sel->rate->bitrate < rate->bitrate) + if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) break; - if (rate_supported(sta, sband->band, i) && + if (rate_supported(ista, sband->band, i) && !(rate->flags & IEEE80211_RATE_ERP_G)) - sel->nonerp = rate; + sel->nonerp_idx = i; } } - - rcu_read_unlock(); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5b45f33cb766..d0092f847f82 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -19,78 +19,48 @@ #include "ieee80211_i.h" #include "sta_info.h" -/* TODO: kdoc */ -struct rate_selection { - /* Selected transmission rate */ - struct ieee80211_rate *rate; - /* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */ - struct ieee80211_rate *nonerp; - /* probe with this rate, or NULL for no probing */ - struct ieee80211_rate *probe; -}; - -struct rate_control_ops { - struct module *module; - const char *name; - void (*tx_status)(void *priv, struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status); - void (*get_rate)(void *priv, struct net_device *dev, - struct ieee80211_supported_band *band, - struct sk_buff *skb, - struct rate_selection *sel); - void (*rate_init)(void *priv, void *priv_sta, - struct ieee80211_local *local, struct sta_info *sta); - void (*clear)(void *priv); - - void *(*alloc)(struct ieee80211_local *local); - void (*free)(void *priv); - void *(*alloc_sta)(void *priv, gfp_t gfp); - void (*free_sta)(void *priv, void *priv_sta); - - int (*add_attrs)(void *priv, struct kobject *kobj); - void (*remove_attrs)(void *priv, struct kobject *kobj); - void (*add_sta_debugfs)(void *priv, void *priv_sta, - struct dentry *dir); - void (*remove_sta_debugfs)(void *priv, void *priv_sta); -}; - struct rate_control_ref { + struct ieee80211_local *local; struct rate_control_ops *ops; void *priv; struct kref kref; }; -int ieee80211_rate_control_register(struct rate_control_ops *ops); -void ieee80211_rate_control_unregister(struct rate_control_ops *ops); - /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); -static inline void rate_control_tx_status(struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status) +static inline void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct sta_info *sta, + struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct rate_control_ref *ref = local->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; - ref->ops->tx_status(ref->priv, dev, skb, status); + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } -static inline void rate_control_rate_init(struct sta_info *sta, - struct ieee80211_local *local) +static inline void rate_control_rate_init(struct sta_info *sta) { + struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; - ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta); + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); } @@ -101,15 +71,19 @@ static inline void rate_control_clear(struct ieee80211_local *local) } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, + struct ieee80211_sta *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, gfp); + return ref->ops->alloc_sta(ref->priv, sta, gfp); } -static inline void rate_control_free_sta(struct rate_control_ref *ref, - void *priv) +static inline void rate_control_free_sta(struct sta_info *sta) { - ref->ops->free_sta(ref->priv, priv); + struct rate_control_ref *ref = sta->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + + ref->ops->free_sta(ref->priv, ista, priv_sta); } static inline void rate_control_add_sta_debugfs(struct sta_info *sta) @@ -131,39 +105,6 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } -static inline int rate_supported(struct sta_info *sta, - enum ieee80211_band band, - int index) -{ - return (sta == NULL || sta->supp_rates[band] & BIT(index)); -} - -static inline int -rate_lowest_index(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - int i; - - for (i = 0; i < sband->n_bitrates; i++) - if (rate_supported(sta, sband->band, i)) - return i; - - /* warn when we cannot find a rate. */ - WARN_ON(1); - - return 0; -} - -static inline struct ieee80211_rate * -rate_lowest(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - return &sband->bitrates[rate_lowest_index(local, sband, sta)]; -} - - /* functions for rate control related to a device */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name); @@ -171,9 +112,7 @@ void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ -#if defined(RC80211_PID_COMPILE) || \ - (defined(CONFIG_MAC80211_RC_PID) && \ - !defined(CONFIG_MAC80211_RC_PID_MODULE)) +#ifdef CONFIG_MAC80211_RC_PID extern int rc80211_pid_init(void); extern void rc80211_pid_exit(void); #else @@ -186,4 +125,18 @@ static inline void rc80211_pid_exit(void) } #endif +#ifdef CONFIG_MAC80211_RC_MINSTREL +extern int rc80211_minstrel_init(void); +extern void rc80211_minstrel_exit(void); +#else +static inline int rc80211_minstrel_init(void) +{ + return 0; +} +static inline void rc80211_minstrel_exit(void) +{ +} +#endif + + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c new file mode 100644 index 000000000000..f6d69dab07a3 --- /dev/null +++ b/net/mac80211/rc80211_minstrel.c @@ -0,0 +1,583 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on minstrel.c: + * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz> + * Sponsored by Indranet Technologies Ltd + * + * Based on sample.c: + * Copyright (c) 2005 John Bicket + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/random.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rate.h" +#include "rc80211_minstrel.h" + +#define SAMPLE_COLUMNS 10 +#define SAMPLE_TBL(_mi, _idx, _col) \ + _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] + +/* convert mac80211 rate index to local array index */ +static inline int +rix_to_ndx(struct minstrel_sta_info *mi, int rix) +{ + int i = rix; + for (i = rix; i >= 0; i--) + if (mi->r[i].rix == rix) + break; + WARN_ON(mi->r[i].rix != rix); + return i; +} + +static inline bool +use_low_rate(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u16 fc; + + fc = le16_to_cpu(hdr->frame_control); + + return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)); +} + + +static void +minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) +{ + u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; + u32 max_prob = 0, index_max_prob = 0; + u32 usecs; + u32 p; + int i; + + mi->stats_update = jiffies; + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + usecs = mr->perfect_tx_time; + if (!usecs) + usecs = 1000000; + + /* To avoid rounding issues, probabilities scale from 0 (0%) + * to 18000 (100%) */ + if (mr->attempts) { + p = (mr->success * 18000) / mr->attempts; + mr->succ_hist += mr->success; + mr->att_hist += mr->attempts; + mr->cur_prob = p; + p = ((p * (100 - mp->ewma_level)) + (mr->probability * + mp->ewma_level)) / 100; + mr->probability = p; + mr->cur_tp = p * (1000000 / usecs); + } + + mr->last_success = mr->success; + mr->last_attempts = mr->attempts; + mr->success = 0; + mr->attempts = 0; + + /* Sample less often below the 10% chance of success. + * Sample less often above the 95% chance of success. */ + if ((mr->probability > 17100) || (mr->probability < 1800)) { + mr->adjusted_retry_count = mr->retry_count >> 1; + if (mr->adjusted_retry_count > 2) + mr->adjusted_retry_count = 2; + } else { + mr->adjusted_retry_count = mr->retry_count; + } + if (!mr->adjusted_retry_count) + mr->adjusted_retry_count = 2; + } + + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + if (max_tp < mr->cur_tp) { + index_max_tp = i; + max_tp = mr->cur_tp; + } + if (max_prob < mr->probability) { + index_max_prob = i; + max_prob = mr->probability; + } + } + + max_tp = 0; + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + if (i == index_max_tp) + continue; + + if (max_tp < mr->cur_tp) { + index_max_tp2 = i; + max_tp = mr->cur_tp; + } + } + mi->max_tp_rate = index_max_tp; + mi->max_tp_rate2 = index_max_tp2; + mi->max_prob_rate = index_max_prob; +} + +static void +minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) +{ + struct minstrel_sta_info *mi = priv_sta; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_altrate *ar = info->status.retries; + struct minstrel_priv *mp = priv; + int i, ndx, tries; + int success = 0; + + if (!info->status.excessive_retries) + success = 1; + + if (!mp->has_mrr || (ar[0].rate_idx < 0)) { + ndx = rix_to_ndx(mi, info->tx_rate_idx); + tries = info->status.retry_count + 1; + mi->r[ndx].success += success; + mi->r[ndx].attempts += tries; + return; + } + + for (i = 0; i < 4; i++) { + if (ar[i].rate_idx < 0) + break; + + ndx = rix_to_ndx(mi, ar[i].rate_idx); + mi->r[ndx].attempts += ar[i].limit + 1; + + if ((i != 3) && (ar[i + 1].rate_idx < 0)) + mi->r[ndx].success += success; + } + + if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) + mi->sample_count++; + + if (mi->sample_deferred > 0) + mi->sample_deferred--; +} + + +static inline unsigned int +minstrel_get_retry_count(struct minstrel_rate *mr, + struct ieee80211_tx_info *info) +{ + unsigned int retry = mr->adjusted_retry_count; + + if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + retry = max(2U, min(mr->retry_count_rtscts, retry)); + else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + retry = max(2U, min(mr->retry_count_cts, retry)); + return retry; +} + + +static int +minstrel_get_next_sample(struct minstrel_sta_info *mi) +{ + unsigned int sample_ndx; + sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); + mi->sample_idx++; + if (mi->sample_idx > (mi->n_rates - 2)) { + mi->sample_idx = 0; + mi->sample_column++; + if (mi->sample_column >= SAMPLE_COLUMNS) + mi->sample_column = 0; + } + return sample_ndx; +} + +void +minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, struct rate_selection *sel) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct minstrel_sta_info *mi = priv_sta; + struct minstrel_priv *mp = priv; + struct ieee80211_tx_altrate *ar = info->control.retries; + unsigned int ndx, sample_ndx = 0; + bool mrr; + bool sample_slower = false; + bool sample = false; + int i, delta; + int mrr_ndx[3]; + int sample_rate; + + if (!sta || !mi || use_low_rate(skb)) { + sel->rate_idx = rate_lowest_index(sband, sta); + return; + } + + mrr = mp->has_mrr; + + /* mac80211 does not allow mrr for RTS/CTS */ + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) + mrr = false; + + if (time_after(jiffies, mi->stats_update + (mp->update_interval * + HZ) / 1000)) + minstrel_update_stats(mp, mi); + + ndx = mi->max_tp_rate; + + if (mrr) + sample_rate = mp->lookaround_rate_mrr; + else + sample_rate = mp->lookaround_rate; + + mi->packet_count++; + delta = (mi->packet_count * sample_rate / 100) - + (mi->sample_count + mi->sample_deferred / 2); + + /* delta > 0: sampling required */ + if (delta > 0) { + if (mi->packet_count >= 10000) { + mi->sample_deferred = 0; + mi->sample_count = 0; + mi->packet_count = 0; + } else if (delta > mi->n_rates * 2) { + /* With multi-rate retry, not every planned sample + * attempt actually gets used, due to the way the retry + * chain is set up - [max_tp,sample,prob,lowest] for + * sample_rate < max_tp. + * + * If there's too much sampling backlog and the link + * starts getting worse, minstrel would start bursting + * out lots of sampling frames, which would result + * in a large throughput loss. */ + mi->sample_count += (delta - mi->n_rates * 2); + } + + sample_ndx = minstrel_get_next_sample(mi); + sample = true; + sample_slower = mrr && (mi->r[sample_ndx].perfect_tx_time > + mi->r[ndx].perfect_tx_time); + + if (!sample_slower) { + ndx = sample_ndx; + mi->sample_count++; + } else { + /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark + * packets that have the sampling rate deferred to the + * second MRR stage. Increase the sample counter only + * if the deferred sample rate was actually used. + * Use the sample_deferred counter to make sure that + * the sampling is not done in large bursts */ + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + mi->sample_deferred++; + } + } + sel->rate_idx = mi->r[ndx].rix; + info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info); + + if (!mrr) { + ar[0].rate_idx = mi->lowest_rix; + ar[0].limit = mp->max_retry; + ar[1].rate_idx = -1; + return; + } + + /* MRR setup */ + if (sample) { + if (sample_slower) + mrr_ndx[0] = sample_ndx; + else + mrr_ndx[0] = mi->max_tp_rate; + } else { + mrr_ndx[0] = mi->max_tp_rate2; + } + mrr_ndx[1] = mi->max_prob_rate; + mrr_ndx[2] = 0; + for (i = 0; i < 3; i++) { + ar[i].rate_idx = mi->r[mrr_ndx[i]].rix; + ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count; + } +} + + +static void +calc_rate_durations(struct minstrel_sta_info *mi, struct ieee80211_local *local, + struct minstrel_rate *d, struct ieee80211_rate *rate) +{ + int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); + + d->perfect_tx_time = ieee80211_frame_duration(local, 1200, + rate->bitrate, erp, 1); + d->ack_time = ieee80211_frame_duration(local, 10, + rate->bitrate, erp, 1); +} + +static void +init_sample_table(struct minstrel_sta_info *mi) +{ + unsigned int i, col, new_idx; + unsigned int n_srates = mi->n_rates - 1; + u8 rnd[8]; + + mi->sample_column = 0; + mi->sample_idx = 0; + memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); + + for (col = 0; col < SAMPLE_COLUMNS; col++) { + for (i = 0; i < n_srates; i++) { + get_random_bytes(rnd, sizeof(rnd)); + new_idx = (i + rnd[i & 7]) % n_srates; + + while (SAMPLE_TBL(mi, new_idx, col) != 0) + new_idx = (new_idx + 1) % n_srates; + + /* Don't sample the slowest rate (i.e. slowest base + * rate). We must presume that the slowest rate works + * fine, or else other management frames will also be + * failing and the link will break */ + SAMPLE_TBL(mi, new_idx, col) = i + 1; + } + } +} + +static void +minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + struct minstrel_priv *mp = priv; + struct minstrel_rate *mr_ctl; + unsigned int i, n = 0; + unsigned int t_slot = 9; /* FIXME: get real slot time */ + + mi->lowest_rix = rate_lowest_index(sband, sta); + mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)]; + mi->sp_ack_dur = mr_ctl->ack_time; + + for (i = 0; i < sband->n_bitrates; i++) { + struct minstrel_rate *mr = &mi->r[n]; + unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; + unsigned int tx_time_single; + unsigned int cw = mp->cw_min; + + if (!rate_supported(sta, sband->band, i)) + continue; + n++; + memset(mr, 0, sizeof(*mr)); + + mr->rix = i; + mr->bitrate = sband->bitrates[i].bitrate / 5; + calc_rate_durations(mi, hw_to_local(mp->hw), mr, + &sband->bitrates[i]); + + /* calculate maximum number of retransmissions before + * fallback (based on maximum segment size) */ + mr->retry_count = 1; + mr->retry_count_cts = 1; + mr->retry_count_rtscts = 1; + tx_time = mr->perfect_tx_time + mi->sp_ack_dur; + do { + /* add one retransmission */ + tx_time_single = mr->ack_time + mr->perfect_tx_time; + + /* contention window */ + tx_time_single += t_slot + min(cw, mp->cw_max); + cw = (cw + 1) << 1; + + tx_time += tx_time_single; + tx_time_cts += tx_time_single + mi->sp_ack_dur; + tx_time_rtscts += tx_time_single + 2 * mi->sp_ack_dur; + if ((tx_time_cts < mp->segment_size) && + (mr->retry_count_cts < mp->max_retry)) + mr->retry_count_cts++; + if ((tx_time_rtscts < mp->segment_size) && + (mr->retry_count_rtscts < mp->max_retry)) + mr->retry_count_rtscts++; + } while ((tx_time < mp->segment_size) && + (++mr->retry_count < mp->max_retry)); + mr->adjusted_retry_count = mr->retry_count; + } + + for (i = n; i < sband->n_bitrates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + mr->rix = -1; + } + + mi->n_rates = n; + mi->stats_update = jiffies; + + init_sample_table(mi); +} + +static void * +minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) +{ + struct ieee80211_supported_band *sband; + struct minstrel_sta_info *mi; + struct minstrel_priv *mp = priv; + struct ieee80211_hw *hw = mp->hw; + int max_rates = 0; + int i; + + mi = kzalloc(sizeof(struct minstrel_sta_info), gfp); + if (!mi) + return NULL; + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[hw->conf.channel->band]; + if (sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + mi->r = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); + if (!mi->r) + goto error; + + mi->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); + if (!mi->sample_table) + goto error1; + + mi->stats_update = jiffies; + return mi; + +error1: + kfree(mi->r); +error: + kfree(mi); + return NULL; +} + +static void +minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + + kfree(mi->sample_table); + kfree(mi->r); + kfree(mi); +} + +static void +minstrel_clear(void *priv) +{ +} + +static void * +minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +{ + struct minstrel_priv *mp; + + mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); + if (!mp) + return NULL; + + /* contention window settings + * Just an approximation. Using the per-queue values would complicate + * the calculations and is probably unnecessary */ + mp->cw_min = 15; + mp->cw_max = 1023; + + /* number of packets (in %) to use for sampling other rates + * sample less often for non-mrr packets, because the overhead + * is much higher than with mrr */ + mp->lookaround_rate = 5; + mp->lookaround_rate_mrr = 10; + + /* moving average weight for EWMA */ + mp->ewma_level = 75; + + /* maximum time that the hw is allowed to stay in one MRR segment */ + mp->segment_size = 6000; + + if (hw->max_altrate_tries > 0) + mp->max_retry = hw->max_altrate_tries; + else + /* safe default, does not necessarily have to match hw properties */ + mp->max_retry = 7; + + if (hw->max_altrates >= 3) + mp->has_mrr = true; + + mp->hw = hw; + mp->update_interval = 100; + + return mp; +} + +static void +minstrel_free(void *priv) +{ + kfree(priv); +} + +static struct rate_control_ops mac80211_minstrel = { + .name = "minstrel", + .tx_status = minstrel_tx_status, + .get_rate = minstrel_get_rate, + .rate_init = minstrel_rate_init, + .clear = minstrel_clear, + .alloc = minstrel_alloc, + .free = minstrel_free, + .alloc_sta = minstrel_alloc_sta, + .free_sta = minstrel_free_sta, +#ifdef CONFIG_MAC80211_DEBUGFS + .add_sta_debugfs = minstrel_add_sta_debugfs, + .remove_sta_debugfs = minstrel_remove_sta_debugfs, +#endif +}; + +int __init +rc80211_minstrel_init(void) +{ + return ieee80211_rate_control_register(&mac80211_minstrel); +} + +void +rc80211_minstrel_exit(void) +{ + ieee80211_rate_control_unregister(&mac80211_minstrel); +} + diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h new file mode 100644 index 000000000000..9a90a6aee043 --- /dev/null +++ b/net/mac80211/rc80211_minstrel.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __RC_MINSTREL_H +#define __RC_MINSTREL_H + +struct minstrel_rate { + int bitrate; + int rix; + + unsigned int perfect_tx_time; + unsigned int ack_time; + + unsigned int retry_count; + unsigned int retry_count_cts; + unsigned int retry_count_rtscts; + unsigned int adjusted_retry_count; + + u32 success; + u32 attempts; + u32 last_attempts; + u32 last_success; + + /* parts per thousand */ + u32 cur_prob; + u32 probability; + + /* per-rate throughput */ + u32 cur_tp; + u32 throughput; + + u64 succ_hist; + u64 att_hist; +}; + +struct minstrel_sta_info { + unsigned long stats_update; + unsigned int sp_ack_dur; + unsigned int rate_avg; + + unsigned int lowest_rix; + + unsigned int max_tp_rate; + unsigned int max_tp_rate2; + unsigned int max_prob_rate; + unsigned int packet_count; + unsigned int sample_count; + int sample_deferred; + + unsigned int sample_idx; + unsigned int sample_column; + + int n_rates; + struct minstrel_rate *r; + + /* sampling table */ + u8 *sample_table; + +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *dbg_stats; +#endif +}; + +struct minstrel_priv { + struct ieee80211_hw *hw; + bool has_mrr; + unsigned int cw_min; + unsigned int cw_max; + unsigned int max_retry; + unsigned int ewma_level; + unsigned int segment_size; + unsigned int update_interval; + unsigned int lookaround_rate; + unsigned int lookaround_rate_mrr; +}; + +void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); +void minstrel_remove_sta_debugfs(void *priv, void *priv_sta); + +#endif diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c new file mode 100644 index 000000000000..0b024cd6b809 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on minstrel.c: + * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz> + * Sponsored by Indranet Technologies Ltd + * + * Based on sample.c: + * Copyright (c) 2005 John Bicket + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rc80211_minstrel.h" + +struct minstrel_stats_info { + struct minstrel_sta_info *mi; + char buf[4096]; + size_t len; +}; + +static int +minstrel_stats_open(struct inode *inode, struct file *file) +{ + struct minstrel_sta_info *mi = inode->i_private; + struct minstrel_stats_info *ms; + unsigned int i, tp, prob, eprob; + char *p; + + ms = kmalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + p += sprintf(p, "rate throughput ewma prob this prob " + "this succ/attempt success attempts\n"); + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + *(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; + p += sprintf(p, "%3u%s", mr->bitrate / 2, + (mr->bitrate & 1 ? ".5" : " ")); + + tp = ((mr->cur_tp * 96) / 18000) >> 10; + prob = mr->cur_prob / 18; + eprob = mr->probability / 18; + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->last_success, + mr->last_attempts, + mr->succ_hist, + mr->att_hist); + } + p += sprintf(p, "\nTotal packet count:: ideal %d " + "lookaround %d\n\n", + mi->packet_count - mi->sample_count, + mi->sample_count); + ms->len = p - ms->buf; + + return 0; +} + +static int +minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o) +{ + struct minstrel_stats_info *ms; + char *src; + + ms = file->private_data; + src = ms->buf; + + len = min(len, ms->len); + if (len <= *o) + return 0; + + src += *o; + len -= *o; + *o += len; + + if (copy_to_user(buf, src, len)) + return -EFAULT; + + return len; +} + +static int +minstrel_stats_release(struct inode *inode, struct file *file) +{ + struct minstrel_stats_info *ms = file->private_data; + + kfree(ms); + + return 0; +} + +static struct file_operations minstrel_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, +}; + +void +minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) +{ + struct minstrel_sta_info *mi = priv_sta; + + mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi, + &minstrel_stat_fops); +} + +void +minstrel_remove_sta_debugfs(void *priv, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + + debugfs_remove(mi->dbg_stats); +} diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 04afc13ed825..01d64d53f3b9 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -61,7 +61,7 @@ enum rc_pid_event_type { union rc_pid_event_data { /* RC_PID_EVENT_TX_STATUS */ struct { - struct ieee80211_tx_status tx_status; + struct ieee80211_tx_info tx_status; }; /* RC_PID_EVENT_TYPE_RATE_CHANGE */ /* RC_PID_EVENT_TYPE_TX_RATE */ @@ -124,7 +124,6 @@ struct rc_pid_events_file_info { * struct rc_pid_debugfs_entries - tunable parameters * * Algorithm parameters, tunable via debugfs. - * @dir: the debugfs directory for a specific phy * @target: target percentage for failed frames * @sampling_period: error sampling interval in milliseconds * @coeff_p: absolute value of the proportional coefficient @@ -141,10 +140,8 @@ struct rc_pid_events_file_info { * rate behaviour values (lower means we should trust more what we learnt * about behaviour of rates, higher means we should trust more the natural * ordering of rates) - * @fast_start: if Y, push high rates right after initialization */ struct rc_pid_debugfs_entries { - struct dentry *dir; struct dentry *target; struct dentry *sampling_period; struct dentry *coeff_p; @@ -154,11 +151,10 @@ struct rc_pid_debugfs_entries { struct dentry *sharpen_factor; struct dentry *sharpen_duration; struct dentry *norm_offset; - struct dentry *fast_start; }; void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_status *stat); + struct ieee80211_tx_info *stat); void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf, int index, int rate); @@ -182,6 +178,8 @@ struct rc_pid_sta_info { u32 tx_num_failed; u32 tx_num_xmit; + int txrate_idx; + /* Average failed frames percentage error (i.e. actual vs. target * percentage), scaled by RC_PID_SMOOTHING. This value is computed * using using an exponential weighted average technique: @@ -267,9 +265,6 @@ struct rc_pid_info { /* Normalization offset. */ unsigned int norm_offset; - /* Fast starst parameter. */ - unsigned int fast_start; - /* Rates information. */ struct rc_pid_rateinfo *rinfo; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a849b745bdb5..86eb374e3b87 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -68,17 +68,14 @@ * exhibited a worse failed frames behaviour and we'll choose the highest rate * whose failed frames behaviour is not worse than the one of the original rate * target. While at it, check that the new rate is valid. */ -static void rate_control_pid_adjust_rate(struct ieee80211_local *local, - struct sta_info *sta, int adj, +static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo, int adj, struct rc_pid_rateinfo *rinfo) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_supported_band *sband; int cur_sorted, new_sorted, probe, tmp, n_bitrates, band; - int cur = sta->txrate_idx; + int cur = spinfo->txrate_idx; - sdata = sta->sdata; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; band = sband->band; n_bitrates = sband->n_bitrates; @@ -111,7 +108,7 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local, /* Fit the rate found to the nearest supported rate. */ do { if (rate_supported(sta, band, rinfo[tmp].index)) { - sta->txrate_idx = rinfo[tmp].index; + spinfo->txrate_idx = rinfo[tmp].index; break; } if (adj < 0) @@ -121,9 +118,9 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local, } while (tmp < n_bitrates && tmp >= 0); #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_rate_change( - &((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events, - sta->txrate_idx, sband->bitrates[sta->txrate_idx].bitrate); + rate_control_pid_event_rate_change(&spinfo->events, + spinfo->txrate_idx, + sband->bitrates[spinfo->txrate_idx].bitrate); #endif } @@ -145,15 +142,11 @@ static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l) } static void rate_control_pid_sample(struct rc_pid_info *pinfo, - struct ieee80211_local *local, - struct sta_info *sta) + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo) { -#ifdef CONFIG_MAC80211_MESH - struct ieee80211_sub_if_data *sdata = sta->sdata; -#endif - struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv; struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - struct ieee80211_supported_band *sband; u32 pf; s32 err_avg; u32 err_prop; @@ -162,9 +155,6 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, int adj, i, j, tmp; unsigned long period; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo = sta->rate_ctrl_priv; - /* In case nothing happened during the previous control interval, turn * the sharpening factor on. */ period = (HZ * pinfo->sampling_period + 500) / 1000; @@ -180,14 +170,15 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, if (unlikely(spinfo->tx_num_xmit == 0)) pf = spinfo->last_pf; else { + /* XXX: BAD HACK!!! */ + struct sta_info *si = container_of(sta, struct sta_info, sta); + pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit; -#ifdef CONFIG_MAC80211_MESH - if (pf == 100 && - sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) - mesh_plink_broken(sta); -#endif + + if (ieee80211_vif_is_mesh(&si->sdata->vif) && pf == 100) + mesh_plink_broken(si); pf <<= RC_PID_ARITH_SHIFT; - sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) + si->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) >> RC_PID_ARITH_SHIFT; } @@ -195,16 +186,16 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, spinfo->tx_num_failed = 0; /* If we just switched rate, update the rate behaviour info. */ - if (pinfo->oldrate != sta->txrate_idx) { + if (pinfo->oldrate != spinfo->txrate_idx) { i = rinfo[pinfo->oldrate].rev_index; - j = rinfo[sta->txrate_idx].rev_index; + j = rinfo[spinfo->txrate_idx].rev_index; tmp = (pf - spinfo->last_pf); tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT); rinfo[j].diff = rinfo[i].diff + tmp; - pinfo->oldrate = sta->txrate_idx; + pinfo->oldrate = spinfo->txrate_idx; } rate_control_pid_normalize(pinfo, sband->n_bitrates); @@ -233,147 +224,105 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, /* Change rate. */ if (adj) - rate_control_pid_adjust_rate(local, sta, adj, rinfo); + rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo); } -static void rate_control_pid_tx_status(void *priv, struct net_device *dev, - struct sk_buff *skb, - struct ieee80211_tx_status *status) +static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; struct rc_pid_info *pinfo = priv; - struct sta_info *sta; - struct rc_pid_sta_info *spinfo; + struct rc_pid_sta_info *spinfo = priv_sta; unsigned long period; - struct ieee80211_supported_band *sband; - - rcu_read_lock(); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sta = sta_info_get(local, hdr->addr1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - if (!sta) - goto unlock; - - /* Don't update the state if we're not controlling the rate. */ - sdata = sta->sdata; - if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) { - sta->txrate_idx = sdata->bss->max_ratectrl_rateidx; - goto unlock; - } + if (!spinfo) + return; /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ - if (status->control.tx_rate != &sband->bitrates[sta->txrate_idx]) - goto unlock; + if (info->tx_rate_idx != spinfo->txrate_idx) + return; - spinfo = sta->rate_ctrl_priv; spinfo->tx_num_xmit++; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_status(&spinfo->events, status); + rate_control_pid_event_tx_status(&spinfo->events, info); #endif /* We count frames that totally failed to be transmitted as two bad * frames, those that made it out but had some retries as one good and * one bad frame. */ - if (status->excessive_retries) { + if (info->status.excessive_retries) { spinfo->tx_num_failed += 2; spinfo->tx_num_xmit++; - } else if (status->retry_count) { + } else if (info->status.retry_count) { spinfo->tx_num_failed++; spinfo->tx_num_xmit++; } - if (status->excessive_retries) { - sta->tx_retry_failed++; - sta->tx_num_consecutive_failures++; - sta->tx_num_mpdu_fail++; - } else { - sta->tx_num_consecutive_failures = 0; - sta->tx_num_mpdu_ok++; - } - sta->tx_retry_count += status->retry_count; - sta->tx_num_mpdu_fail += status->retry_count; - /* Update PID controller state. */ period = (HZ * pinfo->sampling_period + 500) / 1000; if (!period) period = 1; if (time_after(jiffies, spinfo->last_sample + period)) - rate_control_pid_sample(pinfo, local, sta); - - unlock: - rcu_read_unlock(); + rate_control_pid_sample(pinfo, sband, sta, spinfo); } -static void rate_control_pid_get_rate(void *priv, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void +rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; + struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); - if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate = rate_lowest(local, sband, sta); - rcu_read_unlock(); + if (!sta || !spinfo || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)) { + sel->rate_idx = rate_lowest_index(sband, sta); return; } - /* If a forced rate is in effect, select it. */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) - sta->txrate_idx = sdata->bss->force_unicast_rateidx; - - rateidx = sta->txrate_idx; + rateidx = spinfo->txrate_idx; if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - sta->last_txrate_idx = rateidx; - - rcu_read_unlock(); - - sel->rate = &sband->bitrates[rateidx]; + sel->rate_idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_rate( - &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events, + rate_control_pid_event_tx_rate(&spinfo->events, rateidx, sband->bitrates[rateidx].bitrate); #endif } -static void rate_control_pid_rate_init(void *priv, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void +rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { + struct rc_pid_sta_info *spinfo = priv_sta; + struct sta_info *si; + /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ - struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - sta->txrate_idx = rate_lowest_index(local, sband, sta); - sta->fail_avg = 0; + spinfo->txrate_idx = rate_lowest_index(sband, sta); + /* HACK */ + si = container_of(sta, struct sta_info, sta); + si->fail_avg = 0; } -static void *rate_control_pid_alloc(struct ieee80211_local *local) +static void *rate_control_pid_alloc(struct ieee80211_hw *hw, + struct dentry *debugfsdir) { struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; @@ -384,7 +333,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) struct rc_pid_debugfs_entries *de; #endif - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = hw->wiphy->bands[hw->conf.channel->band]; pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) @@ -398,13 +347,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) return NULL; } + pinfo->target = RC_PID_TARGET_PF; + pinfo->sampling_period = RC_PID_INTERVAL; + pinfo->coeff_p = RC_PID_COEFF_P; + pinfo->coeff_i = RC_PID_COEFF_I; + pinfo->coeff_d = RC_PID_COEFF_D; + pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; + pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; + pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; + pinfo->norm_offset = RC_PID_NORM_OFFSET; + pinfo->rinfo = rinfo; + pinfo->oldrate = 0; + /* Sort the rates. This is optimized for the most common case (i.e. * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed * mapping too. */ for (i = 0; i < sband->n_bitrates; i++) { rinfo[i].index = i; rinfo[i].rev_index = i; - if (pinfo->fast_start) + if (RC_PID_FAST_START) rinfo[i].diff = 0; else rinfo[i].diff = i * pinfo->norm_offset; @@ -425,49 +386,31 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) break; } - pinfo->target = RC_PID_TARGET_PF; - pinfo->sampling_period = RC_PID_INTERVAL; - pinfo->coeff_p = RC_PID_COEFF_P; - pinfo->coeff_i = RC_PID_COEFF_I; - pinfo->coeff_d = RC_PID_COEFF_D; - pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; - pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; - pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; - pinfo->norm_offset = RC_PID_NORM_OFFSET; - pinfo->fast_start = RC_PID_FAST_START; - pinfo->rinfo = rinfo; - pinfo->oldrate = 0; - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; - de->dir = debugfs_create_dir("rc80211_pid", - local->hw.wiphy->debugfsdir); de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, - de->dir, &pinfo->target); + debugfsdir, &pinfo->target); de->sampling_period = debugfs_create_u32("sampling_period", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_p); + debugfsdir, &pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_i); + debugfsdir, &pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_d); + debugfsdir, &pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); de->sharpen_factor = debugfs_create_u32("sharpen_factor", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_factor); de->sharpen_duration = debugfs_create_u32("sharpen_duration", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_duration); de->norm_offset = debugfs_create_u32("norm_offset", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->norm_offset); - de->fast_start = debugfs_create_bool("fast_start", - S_IRUSR | S_IWUSR, de->dir, - &pinfo->fast_start); #endif return pinfo; @@ -479,7 +422,6 @@ static void rate_control_pid_free(void *priv) #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de = &pinfo->dentries; - debugfs_remove(de->fast_start); debugfs_remove(de->norm_offset); debugfs_remove(de->sharpen_duration); debugfs_remove(de->sharpen_factor); @@ -489,7 +431,6 @@ static void rate_control_pid_free(void *priv) debugfs_remove(de->coeff_p); debugfs_remove(de->sampling_period); debugfs_remove(de->target); - debugfs_remove(de->dir); #endif kfree(pinfo->rinfo); @@ -500,7 +441,8 @@ static void rate_control_pid_clear(void *priv) { } -static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) +static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, + gfp_t gfp) { struct rc_pid_sta_info *spinfo; @@ -518,10 +460,10 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) return spinfo; } -static void rate_control_pid_free_sta(void *priv, void *priv_sta) +static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { - struct rc_pid_sta_info *spinfo = priv_sta; - kfree(spinfo); + kfree(priv_sta); } static struct rate_control_ops mac80211_rcpid = { @@ -540,11 +482,6 @@ static struct rate_control_ops mac80211_rcpid = { #endif }; -MODULE_DESCRIPTION("PID controller based rate control algorithm"); -MODULE_AUTHOR("Stefano Brivio"); -MODULE_AUTHOR("Mattias Nissler"); -MODULE_LICENSE("GPL"); - int __init rc80211_pid_init(void) { return ieee80211_rate_control_register(&mac80211_rcpid); @@ -554,8 +491,3 @@ void rc80211_pid_exit(void) { ieee80211_rate_control_unregister(&mac80211_rcpid); } - -#ifdef CONFIG_MAC80211_RC_PID_MODULE -module_init(rc80211_pid_init); -module_exit(rc80211_pid_exit); -#endif diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index ff5c380f3c13..8121d3bc6835 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -39,11 +39,11 @@ static void rate_control_pid_event(struct rc_pid_event_buffer *buf, } void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_status *stat) + struct ieee80211_tx_info *stat) { union rc_pid_event_data evd; - memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status)); + memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info)); rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd); } @@ -167,8 +167,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, switch (ev->type) { case RC_PID_EVENT_TYPE_TX_STATUS: p += snprintf(pb + p, length - p, "tx_status %u %u", - ev->data.tx_status.excessive_retries, - ev->data.tx_status.retry_count); + ev->data.tx_status.status.excessive_retries, + ev->data.tx_status.status.retry_count); break; case RC_PID_EVENT_TYPE_RATE_CHANGE: p += snprintf(pb + p, length - p, "rate_change %d %d", diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0941e5d6a522..cf6b121e1bbf 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -61,22 +61,152 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status, int present_fcs_len, int radiotap_len) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) return 1; if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) return 1; - if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == - cpu_to_le16(IEEE80211_FTYPE_CTL)) && - ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) != - cpu_to_le16(IEEE80211_STYPE_PSPOLL)) && - ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) != - cpu_to_le16(IEEE80211_STYPE_BACK_REQ))) + if (ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_pspoll(hdr->frame_control) && + !ieee80211_is_back_req(hdr->frame_control)) return 1; return 0; } +static int +ieee80211_rx_radiotap_len(struct ieee80211_local *local, + struct ieee80211_rx_status *status) +{ + int len; + + /* always present fields */ + len = sizeof(struct ieee80211_radiotap_header) + 9; + + if (status->flag & RX_FLAG_TSFT) + len += 8; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DB || + local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + len += 1; + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) + len += 1; + + if (len & 1) /* padding for RX_FLAGS if necessary */ + len++; + + /* make sure radiotap starts at a naturally aligned address */ + if (len % 8) + len = roundup(len, 8); + + return len; +} + +/** + * ieee80211_add_rx_radiotap_header - add radiotap header + * + * add a radiotap header containing all the fields which the hardware provided. + */ +static void +ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_rx_status *status, + struct ieee80211_rate *rate, + int rtap_len) +{ + struct ieee80211_radiotap_header *rthdr; + unsigned char *pos; + + rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); + memset(rthdr, 0, rtap_len); + + /* radiotap header, set always present flags */ + rthdr->it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_ANTENNA) | + (1 << IEEE80211_RADIOTAP_RX_FLAGS)); + rthdr->it_len = cpu_to_le16(rtap_len); + + pos = (unsigned char *)(rthdr+1); + + /* the order of the following fields is important */ + + /* IEEE80211_RADIOTAP_TSFT */ + if (status->flag & RX_FLAG_TSFT) { + *(__le64 *)pos = cpu_to_le64(status->mactime); + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); + pos += 8; + } + + /* IEEE80211_RADIOTAP_FLAGS */ + if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) + *pos |= IEEE80211_RADIOTAP_F_FCS; + if (status->flag & RX_FLAG_SHORTPRE) + *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; + pos++; + + /* IEEE80211_RADIOTAP_RATE */ + *pos = rate->bitrate / 5; + pos++; + + /* IEEE80211_RADIOTAP_CHANNEL */ + *(__le16 *)pos = cpu_to_le16(status->freq); + pos += 2; + if (status->band == IEEE80211_BAND_5GHZ) + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_5GHZ); + else if (rate->flags & IEEE80211_RATE_ERP_G) + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_2GHZ); + else + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_CCK | + IEEE80211_CHAN_2GHZ); + pos += 2; + + /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + *pos = status->signal; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL); + pos++; + } + + /* IEEE80211_RADIOTAP_DBM_ANTNOISE */ + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) { + *pos = status->noise; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE); + pos++; + } + + /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ + + /* IEEE80211_RADIOTAP_ANTENNA */ + *pos = status->antenna; + pos++; + + /* IEEE80211_RADIOTAP_DB_ANTSIGNAL */ + if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) { + *pos = status->signal; + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL); + pos++; + } + + /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ + + /* IEEE80211_RADIOTAP_RX_FLAGS */ + /* ensure 2 byte alignment for the 2 byte field as required */ + if ((pos - (unsigned char *)rthdr) & 1) + pos++; + /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); + pos += 2; +} + /* * This function copies a received frame to all monitor interfaces and * returns a cleaned-up SKB that no longer includes the FCS nor the @@ -89,17 +219,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, { struct ieee80211_sub_if_data *sdata; int needed_headroom = 0; - struct ieee80211_radiotap_header *rthdr; - __le64 *rttsft = NULL; - struct ieee80211_rtap_fixed_data { - u8 flags; - u8 rate; - __le16 chan_freq; - __le16 chan_flags; - u8 antsignal; - u8 padding_for_rxflags; - __le16 rx_flags; - } __attribute__ ((packed)) *rtfixed; struct sk_buff *skb, *skb2; struct net_device *prev_dev = NULL; int present_fcs_len = 0; @@ -116,8 +235,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (status->flag & RX_FLAG_RADIOTAP) rtap_len = ieee80211_get_radiotap_len(origskb->data); else - /* room for radiotap header, always present fields and TSFT */ - needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8; + /* room for the radiotap header based on driver features */ + needed_headroom = ieee80211_rx_radiotap_len(local, status); if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) present_fcs_len = FCS_LEN; @@ -163,55 +282,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, } /* if necessary, prepend radiotap information */ - if (!(status->flag & RX_FLAG_RADIOTAP)) { - rtfixed = (void *) skb_push(skb, sizeof(*rtfixed)); - rtap_len = sizeof(*rthdr) + sizeof(*rtfixed); - if (status->flag & RX_FLAG_TSFT) { - rttsft = (void *) skb_push(skb, sizeof(*rttsft)); - rtap_len += 8; - } - rthdr = (void *) skb_push(skb, sizeof(*rthdr)); - memset(rthdr, 0, sizeof(*rthdr)); - memset(rtfixed, 0, sizeof(*rtfixed)); - rthdr->it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | - (1 << IEEE80211_RADIOTAP_RX_FLAGS)); - rtfixed->flags = 0; - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) - rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS; - - if (rttsft) { - *rttsft = cpu_to_le64(status->mactime); - rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); - } - - /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ - rtfixed->rx_flags = 0; - if (status->flag & - (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) - rtfixed->rx_flags |= - cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); - - rtfixed->rate = rate->bitrate / 5; - - rtfixed->chan_freq = cpu_to_le16(status->freq); - - if (status->band == IEEE80211_BAND_5GHZ) - rtfixed->chan_flags = - cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_5GHZ); - else - rtfixed->chan_flags = - cpu_to_le16(IEEE80211_CHAN_DYN | - IEEE80211_CHAN_2GHZ); - - rtfixed->antsignal = status->ssi; - rthdr->it_len = cpu_to_le16(rtap_len); - } + if (!(status->flag & RX_FLAG_RADIOTAP)) + ieee80211_add_rx_radiotap_header(local, skb, status, rate, + needed_headroom); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -222,7 +295,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) @@ -253,33 +326,33 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) { - u8 *data = rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int tid; /* does the frame have a qos control field? */ - if (WLAN_FC_IS_QOS_DATA(rx->fc)) { - u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qc = ieee80211_get_qos_ctl(hdr); /* frame has qos control */ - tid = qc[0] & QOS_CONTROL_TID_MASK; - if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) rx->flags |= IEEE80211_RX_AMSDU; else rx->flags &= ~IEEE80211_RX_AMSDU; } else { - if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { - /* Separate TID for management frames */ - tid = NUM_RX_DATA_QUEUES - 1; - } else { - /* no qos control present */ - tid = 0; /* 802.1d - Best Effort */ - } + /* + * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): + * + * Sequence numbers for management frames, QoS data + * frames with a broadcast/multicast address in the + * Address 1 field, and all non-QoS data frames sent + * by QoS STAs are assigned using an additional single + * modulo-4096 counter, [...] + * + * We also use that counter for non-QoS STAs. + */ + tid = NUM_RX_DATA_QUEUES - 1; } - I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); - /* only a debug counter, sta might not be assigned properly yet */ - if (rx->sta) - I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); - rx->queue = tid; /* Set skb->priority to 1d tag if highest order bit of TID is not set. * For now, set skb->priority to 0 for other cases. */ @@ -289,9 +362,10 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) { #ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; - if (!WLAN_FC_DATA_PRESENT(rx->fc)) + if (!ieee80211_is_data_present(hdr->frame_control)) return; /* @@ -313,7 +387,7 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) * header and the payload is not supported, the driver is required * to move the 802.11 header further back in that case. */ - hdrlen = ieee80211_get_hdrlen(rx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->flags & IEEE80211_RX_AMSDU) hdrlen += ETH_HLEN; WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3); @@ -321,62 +395,20 @@ static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) } -static u32 ieee80211_rx_load_stats(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_rx_status *status, - struct ieee80211_rate *rate) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (status->band == IEEE80211_BAND_5GHZ || - (status->band == IEEE80211_BAND_5GHZ && - rate->flags & IEEE80211_RATE_ERP_G)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - /* TODO: optimise again */ - load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate; - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - - return load; -} - /* rx handlers */ -static ieee80211_rx_result -ieee80211_rx_h_if_stats(struct ieee80211_rx_data *rx) -{ - if (rx->sta) - rx->sta->channel_use_raw += rx->load; - rx->sdata->channel_use_raw += rx->load; - return RX_CONTINUE; -} - -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct sk_buff *skb = rx->skb; - if (unlikely(local->sta_hw_scanning)) - return ieee80211_sta_rx_scan(rx->dev, skb, rx->status); + if (unlikely(local->hw_scanning)) + return ieee80211_scan_rx(rx->sdata, skb, rx->status); - if (unlikely(local->sta_sw_scanning)) { + if (unlikely(local->sw_scanning)) { /* drop all the other packets during a software scan anyway */ - if (ieee80211_sta_rx_scan(rx->dev, skb, rx->status) + if (ieee80211_scan_rx(rx->sdata, skb, rx->status) != RX_QUEUED) dev_kfree_skb(skb); return RX_QUEUED; @@ -394,14 +426,11 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { - int hdrlen = ieee80211_get_hdrlen(rx->fc); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - -#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) { - if (!((rx->fc & IEEE80211_FCTL_FROMDS) && - (rx->fc & IEEE80211_FCTL_TODS))) + if (ieee80211_is_data(hdr->frame_control)) { + if (!ieee80211_has_a4(hdr->frame_control)) return RX_DROP_MONITOR; if (memcmp(hdr->addr4, rx->dev->dev_addr, ETH_ALEN) == 0) return RX_DROP_MONITOR; @@ -414,27 +443,30 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) { struct ieee80211_mgmt *mgmt; - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_DROP_MONITOR; - switch (rx->fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_ACTION: + if (ieee80211_is_action(hdr->frame_control)) { mgmt = (struct ieee80211_mgmt *)hdr; if (mgmt->u.action.category != PLINK_CATEGORY) return RX_DROP_MONITOR; - /* fall through on else */ - case IEEE80211_STYPE_PROBE_REQ: - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: return RX_CONTINUE; - break; - default: - return RX_DROP_MONITOR; } - } else if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev)) + if (ieee80211_is_probe_req(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control) || + ieee80211_is_beacon(hdr->frame_control)) + return RX_CONTINUE; + + return RX_DROP_MONITOR; + + } + +#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) + + if (ieee80211_is_data(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->sdata)) return RX_DROP_MONITOR; #undef msh_h_get @@ -442,16 +474,14 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_check(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr; - - hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { - if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && + if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->queue] == hdr->seq_ctrl)) { if (rx->flags & IEEE80211_RX_RA_MATCH) { @@ -471,8 +501,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) /* Drop disallowed frame classes based on STA auth/assoc state; * IEEE 802.11, Chap 5.5. * - * 80211.o does filtering only based on association state, i.e., it - * drops Class 3 frames from not associated stations. hostapd sends + * mac80211 filters only based on association state, i.e. it drops + * Class 3 frames from not associated stations. hostapd sends * deauth/disassoc frames when needed. In addition, hostapd is * responsible for filtering on both auth and assoc states. */ @@ -480,15 +510,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&rx->sdata->vif)) return ieee80211_rx_mesh_check(rx); - if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && - rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { - if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && - !(rx->fc & IEEE80211_FCTL_TODS) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) - || !(rx->flags & IEEE80211_RX_RA_MATCH)) { + if (unlikely((ieee80211_is_data(hdr->frame_control) || + ieee80211_is_pspoll(hdr->frame_control)) && + rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && + (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { + if ((!ieee80211_has_fromds(hdr->frame_control) && + !ieee80211_has_tods(hdr->frame_control) && + ieee80211_is_data(hdr->frame_control)) || + !(rx->flags & IEEE80211_RX_RA_MATCH)) { /* Drop IBSS frames and frames for other hosts * silently. */ return RX_DROP_MONITOR; @@ -501,10 +530,10 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int keyidx; int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; @@ -536,7 +565,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * possible. */ - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return RX_CONTINUE; /* @@ -565,7 +594,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (rx->status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_get_hdrlen(rx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len < 8 + hdrlen) return RX_DROP_UNUSABLE; /* TODO: count this? */ @@ -592,17 +621,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) rx->key->tx_rx_count++; /* TODO: add threshold stuff again */ } else { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: RX protected frame," - " but have no key\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ return RX_DROP_MONITOR; } /* Check for weak IVs if possible */ if (rx->sta && rx->key->conf.alg == ALG_WEP && - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + ieee80211_is_data(hdr->frame_control) && (!(rx->status->flag & RX_FLAG_IV_STRIPPED) || !(rx->status->flag & RX_FLAG_DECRYPTED)) && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) @@ -626,76 +650,68 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +static void ap_sta_ps_start(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = sta->sdata; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - - if (sdata->bss) - atomic_inc(&sdata->bss->num_sta_ps); - sta->flags |= WLAN_STA_PS; - sta->flags &= ~WLAN_STA_PSPOLL; + atomic_inc(&sdata->bss->num_sta_ps); + set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +static int ap_sta_ps_end(struct sta_info *sta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); + atomic_dec(&sdata->bss->num_sta_ps); - sta->flags &= ~(WLAN_STA_PS | WLAN_STA_PSPOLL); + clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); sent++; - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); } while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); local->total_ps_buffered--; sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, - print_mac(mac, sta->addr), sta->aid); + "since STA not sleeping anymore\n", sdata->dev->name, + print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); } return sent; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; if (!sta) return RX_CONTINUE; @@ -703,14 +719,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) /* Update last_rx only for IBSS packets which are for the current * BSSID to avoid keeping the current IBSS network alive in cases where * other STAs are using different BSSID. */ - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, - IEEE80211_IF_TYPE_IBSS); + NL80211_IFTYPE_ADHOC); if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) { + rx->sdata->vif.type == NL80211_IFTYPE_STATION) { /* Update last_rx only for unicast frames in order to prevent * the Probe Request frames (the only broadcast frames from a * STA in infrastructure mode) from keeping a connection alive. @@ -725,24 +741,26 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; - sta->last_rssi = rx->status->ssi; sta->last_signal = rx->status->signal; + sta->last_qual = rx->status->qual; sta->last_noise = rx->status->noise; - if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { + if (!ieee80211_has_morefrags(hdr->frame_control) && + (rx->sdata->vif.type == NL80211_IFTYPE_AP || + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { /* Change STA power saving mode only in the end of a frame * exchange sequence */ - if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) - rx->sent_ps_buffered += ap_sta_ps_end(dev, sta); - else if (!(sta->flags & WLAN_STA_PS) && - (rx->fc & IEEE80211_FCTL_PM)) - ap_sta_ps_start(dev, sta); + if (test_sta_flags(sta, WLAN_STA_PS) && + !ieee80211_has_pm(hdr->frame_control)) + rx->sent_ps_buffered += ap_sta_ps_end(sta); + else if (!test_sta_flags(sta, WLAN_STA_PS) && + ieee80211_has_pm(hdr->frame_control)) + ap_sta_ps_start(sta); } /* Drop data::nullfunc frames silently, since they are used only to * control station power saving mode. */ - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { + if (ieee80211_is_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* Update counter and free packet here to avoid counting this * as a dropped packed. */ @@ -768,7 +786,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, sdata->fragment_next = 0; if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_DEBUG +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; DECLARE_MAC_BUF(mac); @@ -780,7 +798,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, jiffies - entry->first_frag_time, entry->seq, entry->last_frag, print_mac(mac, hdr->addr1), print_mac(mac2, hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ +#endif __skb_queue_purge(&entry->skb_list); } @@ -798,7 +816,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, static inline struct ieee80211_fragment_entry * ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, - u16 fc, unsigned int frag, unsigned int seq, + unsigned int frag, unsigned int seq, int rx_queue, struct ieee80211_hdr *hdr) { struct ieee80211_fragment_entry *entry; @@ -807,7 +825,6 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, idx = sdata->fragment_next; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { struct ieee80211_hdr *f_hdr; - u16 f_fc; idx--; if (idx < 0) @@ -819,10 +836,13 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, entry->last_frag + 1 != frag) continue; - f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - f_fc = le16_to_cpu(f_hdr->frame_control); + f_hdr = (struct ieee80211_hdr *)entry->skb_list.next->data; - if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || + /* + * Check ftype and addresses are equal, else check next fragment + */ + if (((hdr->frame_control ^ f_hdr->frame_control) & + cpu_to_le16(IEEE80211_FCTL_FTYPE)) || compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) continue; @@ -837,21 +857,23 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, return NULL; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr; u16 sc; + __le16 fc; unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; DECLARE_MAC_BUF(mac); - hdr = (struct ieee80211_hdr *) rx->skb->data; + hdr = (struct ieee80211_hdr *)rx->skb->data; + fc = hdr->frame_control; sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || + if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || (rx->skb)->len < 24 || is_multicast_ether_addr(hdr->addr1))) { /* not fragmented */ @@ -866,7 +888,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->queue, &(rx->skb)); if (rx->key && rx->key->conf.alg == ALG_CCMP && - (rx->fc & IEEE80211_FCTL_PROTECTED)) { + ieee80211_has_protected(fc)) { /* Store CCMP PN so that we can verify that the next * fragment has a sequential PN value. */ entry->ccmp = 1; @@ -880,8 +902,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, - rx->queue, hdr); + entry = ieee80211_reassemble_find(rx->sdata, frag, seq, rx->queue, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); return RX_DROP_MONITOR; @@ -901,26 +922,16 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) break; } rpn = rx->key->u.ccmp.rx_pn[rx->queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: defrag: CCMP PN not " - "sequential A2=%s" - " PN=%02x%02x%02x%02x%02x%02x " - "(expected %02x%02x%02x%02x%02x%02x)\n", - rx->dev->name, print_mac(mac, hdr->addr2), - rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], - rpn[5], pn[0], pn[1], pn[2], pn[3], - pn[4], pn[5]); + if (memcmp(pn, rpn, CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - } memcpy(entry->last_pn, pn, CCMP_PN_LEN); } - skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); + skb_pull(rx->skb, ieee80211_hdrlen(fc)); __skb_queue_tail(&entry->skb_list, rx->skb); entry->last_frag = frag; entry->extra_len += rx->skb->len; - if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { + if (ieee80211_has_morefrags(fc)) { rx->skb = NULL; return RX_QUEUED; } @@ -953,22 +964,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); struct sk_buff *skb; int no_pending_pkts; DECLARE_MAC_BUF(mac); + __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; - if (likely(!rx->sta || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || + if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || !(rx->flags & IEEE80211_RX_RA_MATCH))) return RX_CONTINUE; - if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) && - (sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) + if ((sdata->vif.type != NL80211_IFTYPE_AP) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return RX_DROP_UNUSABLE; skb = skb_dequeue(&rx->sta->tx_filtered); @@ -988,11 +998,11 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) * Tell TX path to send one frame even though the STA may * still remain is PS mode after this frame exchange. */ - rx->sta->flags |= WLAN_STA_PSPOLL; + set_sta_flags(rx->sta, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", - print_mac(mac, rx->sta->addr), rx->sta->aid, + print_mac(mac, rx->sta->sta.addr), rx->sta->sta.aid, skb_queue_len(&rx->sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -1016,8 +1026,8 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) * have nothing buffered for it? */ printk(KERN_DEBUG "%s: STA %s sent PS Poll even " - "though there is no buffered frames for it\n", - rx->dev->name, print_mac(mac, rx->sta->addr)); + "though there are no buffered frames for it\n", + rx->dev->name, print_mac(mac, rx->sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -1028,22 +1038,21 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) { - u16 fc = rx->fc; u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data; - if (!WLAN_FC_IS_QOS_DATA(fc)) + if (!ieee80211_is_data_qos(hdr->frame_control)) return RX_CONTINUE; /* remove the qos control field, update frame type and meta-data */ - memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); - hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); + memmove(data + IEEE80211_QOS_CTL_LEN, data, + ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN); + hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN); /* change frame type to non QOS */ - rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); + hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); return RX_CONTINUE; } @@ -1051,20 +1060,15 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) { - if (unlikely(!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED))) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped frame " - "(unauthorized port)\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ + if (unlikely(!rx->sta || + !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) return -EACCES; - } return 0; } static int -ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx) +ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { /* * Pass through unencrypted frames if the hardware has @@ -1074,9 +1078,8 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx) return 0; /* Drop unencrypted frames if key is set. */ - if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + if (unlikely(!ieee80211_has_protected(fc) && + !ieee80211_is_nullfunc(fc) && (rx->key || rx->sdata->drop_unencrypted))) return -EACCES; @@ -1088,7 +1091,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc, hdrlen, ethertype; + u16 hdrlen, ethertype; u8 *payload; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN] __aligned(2); @@ -1099,27 +1102,10 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) DECLARE_MAC_BUF(mac3); DECLARE_MAC_BUF(mac4); - fc = rx->fc; - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return -1; - hdrlen = ieee80211_get_hdrlen(fc); - - if (ieee80211_vif_is_mesh(&sdata->vif)) { - int meshhdrlen = ieee80211_get_mesh_hdrlen( - (struct ieee80211s_hdr *) (skb->data + hdrlen)); - /* Copy on cb: - * - mesh header: to be used for mesh forwarding - * decision. It will also be used as mesh header template at - * tx.c:ieee80211_subif_start_xmit() if interface - * type is mesh and skb->pkt_type == PACKET_OTHERHOST - * - ta: to be used if a RERR needs to be sent. - */ - memcpy(skb->cb, skb->data + hdrlen, meshhdrlen); - memcpy(MESH_PREQ(skb), hdr->addr2, ETH_ALEN); - hdrlen += meshhdrlen; - } + hdrlen = ieee80211_hdrlen(hdr->frame_control); /* convert IEEE 802.11 header + possible LLC headers into Ethernet * header @@ -1130,79 +1116,44 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) * 1 0 BSSID SA DA n/a * 1 1 RA TA DA SA */ - - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - /* BSSID SA DA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP && - sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped ToDS frame " - "(BSSID=%s SA=%s DA=%s)\n", - dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3)); + memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); + memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); + + switch (hdr->frame_control & + cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): + if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return -1; - } break; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - /* RA TA DA SA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped FromDS&ToDS " - "frame (RA=%s TA=%s DA=%s SA=%s)\n", - rx->dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3), - print_mac(mac4, hdr->addr4)); + case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) return -1; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *) + (skb->data + hdrlen); + hdrlen += ieee80211_get_mesh_hdrlen(meshdr); + if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { + memcpy(dst, meshdr->eaddr1, ETH_ALEN); + memcpy(src, meshdr->eaddr2, ETH_ALEN); + } } break; - case IEEE80211_FCTL_FROMDS: - /* DA BSSID SA */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA || + case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): + if (sdata->vif.type != NL80211_IFTYPE_STATION || (is_multicast_ether_addr(dst) && !compare_ether_addr(src, dev->dev_addr))) return -1; break; - case 0: - /* DA SA BSSID */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: dropped IBSS frame " - "(DA=%s SA=%s BSSID=%s)\n", - dev->name, - print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2), - print_mac(mac3, hdr->addr3)); - } + case __constant_cpu_to_le16(0): + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return -1; - } break; } - if (unlikely(skb->len - hdrlen < 8)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: RX too short data frame " - "payload\n", dev->name); - } + if (unlikely(skb->len - hdrlen < 8)) return -1; - } payload = skb->data + hdrlen; ethertype = (payload[6] << 8) | payload[7]; @@ -1232,7 +1183,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) /* * requires that rx->skb is a frame with ethernet header */ -static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) +static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) { static const u8 pae_group_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; @@ -1248,7 +1199,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) return true; if (ieee80211_802_1x_port_control(rx) || - ieee80211_drop_unencrypted(rx)) + ieee80211_drop_unencrypted(rx, fc)) return false; return true; @@ -1270,8 +1221,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb = rx->skb; xmit_skb = NULL; - if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP || - sdata->vif.type == IEEE80211_IF_TYPE_VLAN) && + if ((sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && + !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (rx->flags & IEEE80211_RX_RA_MATCH)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* @@ -1297,38 +1249,6 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } - /* Mesh forwarding */ - if (ieee80211_vif_is_mesh(&sdata->vif)) { - u8 *mesh_ttl = &((struct ieee80211s_hdr *)skb->cb)->ttl; - (*mesh_ttl)--; - - if (is_multicast_ether_addr(skb->data)) { - if (*mesh_ttl > 0) { - xmit_skb = skb_copy(skb, GFP_ATOMIC); - if (xmit_skb) - xmit_skb->pkt_type = PACKET_OTHERHOST; - else if (net_ratelimit()) - printk(KERN_DEBUG "%s: failed to clone " - "multicast frame\n", dev->name); - } else - IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta, - dropped_frames_ttl); - } else if (skb->pkt_type != PACKET_OTHERHOST && - compare_ether_addr(dev->dev_addr, skb->data) != 0) { - if (*mesh_ttl == 0) { - IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta, - dropped_frames_ttl); - dev_kfree_skb(skb); - skb = NULL; - } else { - xmit_skb = skb; - xmit_skb->pkt_type = PACKET_OTHERHOST; - if (!(dev->flags & IFF_PROMISC)) - skb = NULL; - } - } - } - if (skb) { /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); @@ -1345,25 +1265,26 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; struct ieee80211_local *local = rx->local; - u16 fc, ethertype; + u16 ethertype; u8 *payload; struct sk_buff *skb = rx->skb, *frame = NULL; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + __le16 fc = hdr->frame_control; const struct ethhdr *eth; int remaining, err; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; DECLARE_MAC_BUF(mac); - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(fc))) return RX_DROP_MONITOR; if (!(rx->flags & IEEE80211_RX_AMSDU)) @@ -1394,10 +1315,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) padding = ((4 - subframe_len) & 0x3); /* the last MSDU has no padding */ - if (subframe_len > remaining) { - printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name); + if (subframe_len > remaining) return RX_DROP_UNUSABLE; - } skb_pull(skb, sizeof(struct ethhdr)); /* if last subframe reuse skb */ @@ -1418,8 +1337,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) eth = (struct ethhdr *) skb_pull(skb, ntohs(len) + padding); if (!eth) { - printk(KERN_DEBUG "%s: wrong buffer size\n", - dev->name); dev_kfree_skb(frame); return RX_DROP_UNUSABLE; } @@ -1449,7 +1366,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); } - if (!ieee80211_frame_allowed(rx)) { + if (!ieee80211_frame_allowed(rx, fc)) { if (skb == frame) /* last frame */ return RX_DROP_UNUSABLE; dev_kfree_skb(frame); @@ -1462,25 +1379,102 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_QUEUED; } +#ifdef CONFIG_MAC80211_MESH static ieee80211_rx_result +ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) +{ + struct ieee80211_hdr *hdr; + struct ieee80211s_hdr *mesh_hdr; + unsigned int hdrlen; + struct sk_buff *skb = rx->skb, *fwd_skb; + + hdr = (struct ieee80211_hdr *) skb->data; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + + if (!ieee80211_is_data(hdr->frame_control)) + return RX_CONTINUE; + + if (!mesh_hdr->ttl) + /* illegal frame */ + return RX_DROP_MONITOR; + + if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6){ + struct ieee80211_sub_if_data *sdata; + struct mesh_path *mppath; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + rcu_read_lock(); + mppath = mpp_path_lookup(mesh_hdr->eaddr2, sdata); + if (!mppath) { + mpp_path_add(mesh_hdr->eaddr2, hdr->addr4, sdata); + } else { + spin_lock_bh(&mppath->state_lock); + mppath->exp_time = jiffies; + if (compare_ether_addr(mppath->mpp, hdr->addr4) != 0) + memcpy(mppath->mpp, hdr->addr4, ETH_ALEN); + spin_unlock_bh(&mppath->state_lock); + } + rcu_read_unlock(); + } + + if (compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0) + return RX_CONTINUE; + + mesh_hdr->ttl--; + + if (rx->flags & IEEE80211_RX_RA_MATCH) { + if (!mesh_hdr->ttl) + IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, + dropped_frames_ttl); + else { + struct ieee80211_hdr *fwd_hdr; + fwd_skb = skb_copy(skb, GFP_ATOMIC); + + if (!fwd_skb && net_ratelimit()) + printk(KERN_DEBUG "%s: failed to clone mesh frame\n", + rx->dev->name); + + fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; + /* + * Save TA to addr1 to send TA a path error if a + * suitable next hop is not found + */ + memcpy(fwd_hdr->addr1, fwd_hdr->addr2, ETH_ALEN); + memcpy(fwd_hdr->addr2, rx->dev->dev_addr, ETH_ALEN); + fwd_skb->dev = rx->local->mdev; + fwd_skb->iif = rx->dev->ifindex; + dev_queue_xmit(fwd_skb); + } + } + + if (is_multicast_ether_addr(hdr->addr3) || + rx->dev->flags & IFF_PROMISC) + return RX_CONTINUE; + else + return RX_DROP_MONITOR; +} +#endif + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_data(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; - u16 fc; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + __le16 fc = hdr->frame_control; int err; - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + if (unlikely(!ieee80211_is_data(hdr->frame_control))) return RX_CONTINUE; - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; err = ieee80211_data_to_8023(rx); if (unlikely(err)) return RX_DROP_UNUSABLE; - if (!ieee80211_frame_allowed(rx)) + if (!ieee80211_frame_allowed(rx, fc)) return RX_DROP_MONITOR; rx->skb->dev = dev; @@ -1493,21 +1487,21 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb = rx->skb; - struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data; + struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data; struct tid_ampdu_rx *tid_agg_rx; u16 start_seq_num; u16 tid; - if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL)) + if (likely(!ieee80211_is_ctl(bar->frame_control))) return RX_CONTINUE; - if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) { + if (ieee80211_is_back_req(bar->frame_control)) { if (!rx->sta) return RX_CONTINUE; tid = le16_to_cpu(bar->control) >> 12; @@ -1537,23 +1531,98 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_rx_result +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + int len = rx->skb->len; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + if (!rx->sta) + return RX_DROP_MONITOR; + + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + return RX_DROP_MONITOR; + + /* all categories we currently handle have action_code */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + return RX_DROP_MONITOR; + + /* + * FIXME: revisit this, I'm sure we should handle most + * of these frames in other modes as well! + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_CONTINUE; + + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_BACK: + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_req))) + return RX_DROP_MONITOR; + ieee80211_process_addba_request(local, rx->sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_resp))) + return RX_DROP_MONITOR; + ieee80211_process_addba_resp(local, rx->sta, mgmt, len); + break; + case WLAN_ACTION_DELBA: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.delba))) + return RX_DROP_MONITOR; + ieee80211_process_delba(sdata, rx->sta, mgmt, len); + break; + } + break; + case WLAN_CATEGORY_SPECTRUM_MGMT: + if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) + return RX_DROP_MONITOR; + switch (mgmt->u.action.u.measurement.action_code) { + case WLAN_ACTION_SPCT_MSR_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.measurement))) + return RX_DROP_MONITOR; + ieee80211_process_measurement_req(sdata, mgmt, len); + break; + } + break; + default: + return RX_CONTINUE; + } + + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - if ((sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS || - sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) && - !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) - ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->status); - else + if (ieee80211_vif_is_mesh(&sdata->vif)) + return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); + + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return RX_DROP_MONITOR; + ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); return RX_QUEUED; } @@ -1561,69 +1630,43 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, struct ieee80211_hdr *hdr, struct ieee80211_rx_data *rx) { - int keyidx, hdrlen; + int keyidx; + unsigned int hdrlen; DECLARE_MAC_BUF(mac); DECLARE_MAC_BUF(mac2); - hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len >= hdrlen + 4) keyidx = rx->skb->data[hdrlen + 3] >> 6; else keyidx = -1; - if (net_ratelimit()) - printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " - "failure from %s to %s keyidx=%d\n", - dev->name, print_mac(mac, hdr->addr2), - print_mac(mac2, hdr->addr1), keyidx); - if (!rx->sta) { /* * Some hardware seem to generate incorrect Michael MIC * reports; ignore them to avoid triggering countermeasures. */ - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for unknown address %s\n", - dev->name, print_mac(mac, hdr->addr2)); goto ignore; } - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame with no PROTECTED flag (src " - "%s)\n", dev->name, print_mac(mac, hdr->addr2)); + if (!ieee80211_has_protected(hdr->frame_control)) goto ignore; - } - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) { + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) { /* * APs with pairwise keys should never receive Michael MIC * errors for non-zero keyidx because these are reserved for * group keys and only the AP is sending real multicast * frames in the BSS. */ - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored Michael MIC error for " - "a frame with non-zero keyidx (%d)" - " (src %s)\n", dev->name, keyidx, - print_mac(mac, hdr->addr2)); goto ignore; } - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame that cannot be encrypted " - "(fc=0x%04x) (src %s)\n", - dev->name, rx->fc, print_mac(mac, hdr->addr2)); + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_auth(hdr->frame_control)) goto ignore; - } - mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); + mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr); ignore: dev_kfree_skb(rx->skb); rx->skb = NULL; @@ -1679,7 +1722,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) continue; @@ -1710,67 +1753,64 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) dev_kfree_skb(skb); } -typedef ieee80211_rx_result (*ieee80211_rx_handler)(struct ieee80211_rx_data *); -static ieee80211_rx_handler ieee80211_rx_handlers[] = -{ - ieee80211_rx_h_if_stats, - ieee80211_rx_h_passive_scan, - ieee80211_rx_h_check, - ieee80211_rx_h_decrypt, - ieee80211_rx_h_sta_process, - ieee80211_rx_h_defragment, - ieee80211_rx_h_ps_poll, - ieee80211_rx_h_michael_mic_verify, - /* this must be after decryption - so header is counted in MPDU mic - * must be before pae and data, so QOS_DATA format frames - * are not passed to user space by these functions - */ - ieee80211_rx_h_remove_qos_control, - ieee80211_rx_h_amsdu, - ieee80211_rx_h_data, - ieee80211_rx_h_ctrl, - ieee80211_rx_h_mgmt, - NULL -}; static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_data *rx, struct sk_buff *skb) { - ieee80211_rx_handler *handler; ieee80211_rx_result res = RX_DROP_MONITOR; rx->skb = skb; rx->sdata = sdata; rx->dev = sdata->dev; - for (handler = ieee80211_rx_handlers; *handler != NULL; handler++) { - res = (*handler)(rx); +#define CALL_RXH(rxh) \ + do { \ + res = rxh(rx); \ + if (res != RX_CONTINUE) \ + goto rxh_done; \ + } while (0); + + CALL_RXH(ieee80211_rx_h_passive_scan) + CALL_RXH(ieee80211_rx_h_check) + CALL_RXH(ieee80211_rx_h_decrypt) + CALL_RXH(ieee80211_rx_h_sta_process) + CALL_RXH(ieee80211_rx_h_defragment) + CALL_RXH(ieee80211_rx_h_ps_poll) + CALL_RXH(ieee80211_rx_h_michael_mic_verify) + /* must be after MMIC verify so header is counted in MPDU mic */ + CALL_RXH(ieee80211_rx_h_remove_qos_control) + CALL_RXH(ieee80211_rx_h_amsdu) +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sdata->vif)) + CALL_RXH(ieee80211_rx_h_mesh_fwding); +#endif + CALL_RXH(ieee80211_rx_h_data) + CALL_RXH(ieee80211_rx_h_ctrl) + CALL_RXH(ieee80211_rx_h_action) + CALL_RXH(ieee80211_rx_h_mgmt) - switch (res) { - case RX_CONTINUE: - continue; - case RX_DROP_UNUSABLE: - case RX_DROP_MONITOR: - I802_DEBUG_INC(sdata->local->rx_handlers_drop); - if (rx->sta) - rx->sta->rx_dropped++; - break; - case RX_QUEUED: - I802_DEBUG_INC(sdata->local->rx_handlers_queued); - break; - } - break; - } +#undef CALL_RXH + rxh_done: switch (res) { - case RX_CONTINUE: case RX_DROP_MONITOR: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; + /* fall through */ + case RX_CONTINUE: ieee80211_rx_cooked_monitor(rx); break; case RX_DROP_UNUSABLE: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; dev_kfree_skb(rx->skb); break; + case RX_QUEUED: + I802_DEBUG_INC(sdata->local->rx_handlers_queued); + break; } } @@ -1783,7 +1823,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, int multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: if (!bssid) return 0; if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { @@ -1798,12 +1838,12 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) + if (ieee80211_is_beacon(hdr->frame_control)) { return 1; + } else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { if (!(rx->flags & IEEE80211_RX_IN_SCAN)) return 0; @@ -1815,10 +1855,11 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) - rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, - bssid, hdr->addr2); + rx->sta = ieee80211_ibss_add_sta(sdata, rx->skb, + bssid, hdr->addr2, + BIT(rx->status->rate_idx)); break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (!multicast && compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) { @@ -1828,8 +1869,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_VLAN: - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: if (!bssid) { if (compare_ether_addr(sdata->dev->dev_addr, hdr->addr1)) @@ -1840,23 +1881,18 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; } - if (sdata->dev == sdata->local->mdev && - !(rx->flags & IEEE80211_RX_IN_SCAN)) - /* do not receive anything via - * master device when not scanning */ - return 0; break; - case IEEE80211_IF_TYPE_WDS: - if (bssid || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + case NL80211_IFTYPE_WDS: + if (bssid || !ieee80211_is_data(hdr->frame_control)) return 0; if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: /* take everything */ break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: /* should never get here */ WARN_ON(1); break; @@ -1872,31 +1908,26 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_rx_status *status, - u32 load, struct ieee80211_rate *rate) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr; struct ieee80211_rx_data rx; - u16 type; int prepares; struct ieee80211_sub_if_data *prev = NULL; struct sk_buff *skb_new; u8 *bssid; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; memset(&rx, 0, sizeof(rx)); rx.skb = skb; rx.local = local; rx.status = status; - rx.load = load; rx.rate = rate; - rx.fc = le16_to_cpu(hdr->frame_control); - type = rx.fc & IEEE80211_FCTL_FTYPE; - if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) + if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control)) local->dot11ReceivedFragmentCount++; rx.sta = sta_info_get(local, hdr->addr2); @@ -1910,7 +1941,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, return; } - if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning)) + if (unlikely(local->sw_scanning || local->hw_scanning)) rx.flags |= IEEE80211_RX_IN_SCAN; ieee80211_parse_qos(&rx); @@ -1922,7 +1953,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); @@ -1957,14 +1988,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, prev->dev->name); continue; } - rx.fc = le16_to_cpu(hdr->frame_control); ieee80211_invoke_rx_handlers(prev, &rx, skb_new); prev = sdata; } - if (prev) { - rx.fc = le16_to_cpu(hdr->frame_control); + if (prev) ieee80211_invoke_rx_handlers(prev, &rx, skb); - } else + else dev_kfree_skb(skb); } @@ -2000,7 +2029,6 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct ieee80211_rx_status status; u16 head_seq_num, buf_size; int index; - u32 pkt_load; struct ieee80211_supported_band *sband; struct ieee80211_rate *rate; @@ -2035,12 +2063,9 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; rate = &sband->bitrates[status.rate_idx]; - pkt_load = ieee80211_rx_load_stats(local, - tid_agg_rx->reorder_buf[index], - &status, rate); __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], - &status, pkt_load, rate); + &status, rate); tid_agg_rx->stored_mpdu_num--; tid_agg_rx->reorder_buf[index] = NULL; } @@ -2082,11 +2107,8 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; rate = &sband->bitrates[status.rate_idx]; - pkt_load = ieee80211_rx_load_stats(local, - tid_agg_rx->reorder_buf[index], - &status, rate); __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], - &status, pkt_load, rate); + &status, rate); tid_agg_rx->stored_mpdu_num--; tid_agg_rx->reorder_buf[index] = NULL; tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); @@ -2103,32 +2125,29 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct sta_info *sta; struct tid_ampdu_rx *tid_agg_rx; - u16 fc, sc; + u16 sc; u16 mpdu_seq_num; - u8 ret = 0, *qc; + u8 ret = 0; int tid; sta = sta_info_get(local, hdr->addr2); if (!sta) return ret; - fc = le16_to_cpu(hdr->frame_control); - /* filter the QoS data rx stream according to * STA/TID and check if this STA/TID is on aggregation */ - if (!WLAN_FC_IS_QOS_DATA(fc)) + if (!ieee80211_is_data_qos(hdr->frame_control)) goto end_reorder; - qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN; - tid = qc[0] & QOS_CONTROL_TID_MASK; + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) goto end_reorder; tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - /* null data frames are excluded */ - if (unlikely(fc & IEEE80211_STYPE_NULLFUNC)) + /* qos null data frames are excluded */ + if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) goto end_reorder; /* new un-ordered ampdu frame - process it */ @@ -2143,7 +2162,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr, + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); ret = 1; goto end_reorder; @@ -2165,7 +2184,6 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_rx_status *status) { struct ieee80211_local *local = hw_to_local(hw); - u32 pkt_load; struct ieee80211_rate *rate = NULL; struct ieee80211_supported_band *sband; @@ -2205,11 +2223,8 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - pkt_load = ieee80211_rx_load_stats(local, skb, status, rate); - local->channel_use_raw += pkt_load; - if (!ieee80211_rx_reorder_ampdu(local, skb)) - __ieee80211_rx_handle_packet(hw, skb, status, pkt_load, rate); + __ieee80211_rx_handle_packet(hw, skb, status, rate); rcu_read_unlock(); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c new file mode 100644 index 000000000000..8e6685e7ae85 --- /dev/null +++ b/net/mac80211/scan.c @@ -0,0 +1,937 @@ +/* + * Scanning implementation + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2004, Instant802 Networks, Inc. + * Copyright 2005, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* TODO: + * order BSS list by RSSI(?) ("quality of AP") + * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, + * SSID) + */ + +#include <linux/wireless.h> +#include <linux/if_arp.h> +#include <net/mac80211.h> +#include <net/iw_handler.h> + +#include "ieee80211_i.h" +#include "mesh.h" + +#define IEEE80211_PROBE_DELAY (HZ / 33) +#define IEEE80211_CHANNEL_TIME (HZ / 33) +#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) + +void ieee80211_rx_bss_list_init(struct ieee80211_local *local) +{ + spin_lock_init(&local->bss_lock); + INIT_LIST_HEAD(&local->bss_list); +} + +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local) +{ + struct ieee80211_bss *bss, *tmp; + + list_for_each_entry_safe(bss, tmp, &local->bss_list, list) + ieee80211_rx_bss_put(local, bss); +} + +struct ieee80211_bss * +ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len) +{ + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + bss = local->bss_hash[STA_HASH(bssid)]; + while (bss) { + if (!bss_mesh_cfg(bss) && + !memcmp(bss->bssid, bssid, ETH_ALEN) && + bss->freq == freq && + bss->ssid_len == ssid_len && + (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { + atomic_inc(&bss->users); + break; + } + bss = bss->hnext; + } + spin_unlock_bh(&local->bss_lock); + return bss; +} + +/* Caller must hold local->bss_lock */ +static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + u8 hash_idx; + + if (bss_mesh_cfg(bss)) + hash_idx = mesh_id_hash(bss_mesh_id(bss), + bss_mesh_id_len(bss)); + else + hash_idx = STA_HASH(bss->bssid); + + bss->hnext = local->bss_hash[hash_idx]; + local->bss_hash[hash_idx] = bss; +} + +/* Caller must hold local->bss_lock */ +static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + struct ieee80211_bss *b, *prev = NULL; + b = local->bss_hash[STA_HASH(bss->bssid)]; + while (b) { + if (b == bss) { + if (!prev) + local->bss_hash[STA_HASH(bss->bssid)] = + bss->hnext; + else + prev->hnext = bss->hnext; + break; + } + prev = b; + b = b->hnext; + } +} + +struct ieee80211_bss * +ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len) +{ + struct ieee80211_bss *bss; + + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); + if (!bss) + return NULL; + atomic_set(&bss->users, 2); + memcpy(bss->bssid, bssid, ETH_ALEN); + bss->freq = freq; + if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { + memcpy(bss->ssid, ssid, ssid_len); + bss->ssid_len = ssid_len; + } + + spin_lock_bh(&local->bss_lock); + /* TODO: order by RSSI? */ + list_add_tail(&bss->list, &local->bss_list); + __ieee80211_rx_bss_hash_add(local, bss); + spin_unlock_bh(&local->bss_lock); + return bss; +} + +#ifdef CONFIG_MAC80211_MESH +static struct ieee80211_bss * +ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, + u8 *mesh_cfg, int freq) +{ + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)]; + while (bss) { + if (bss_mesh_cfg(bss) && + !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) && + bss->freq == freq && + mesh_id_len == bss->mesh_id_len && + (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id, + mesh_id_len))) { + atomic_inc(&bss->users); + break; + } + bss = bss->hnext; + } + spin_unlock_bh(&local->bss_lock); + return bss; +} + +static struct ieee80211_bss * +ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, + u8 *mesh_cfg, int mesh_config_len, int freq) +{ + struct ieee80211_bss *bss; + + if (mesh_config_len != MESH_CFG_LEN) + return NULL; + + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); + if (!bss) + return NULL; + + bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC); + if (!bss->mesh_cfg) { + kfree(bss); + return NULL; + } + + if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) { + bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC); + if (!bss->mesh_id) { + kfree(bss->mesh_cfg); + kfree(bss); + return NULL; + } + memcpy(bss->mesh_id, mesh_id, mesh_id_len); + } + + atomic_set(&bss->users, 2); + memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN); + bss->mesh_id_len = mesh_id_len; + bss->freq = freq; + spin_lock_bh(&local->bss_lock); + /* TODO: order by RSSI? */ + list_add_tail(&bss->list, &local->bss_list); + __ieee80211_rx_bss_hash_add(local, bss); + spin_unlock_bh(&local->bss_lock); + return bss; +} +#endif + +static void ieee80211_rx_bss_free(struct ieee80211_bss *bss) +{ + kfree(bss->ies); + kfree(bss_mesh_id(bss)); + kfree(bss_mesh_cfg(bss)); + kfree(bss); +} + +void ieee80211_rx_bss_put(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + local_bh_disable(); + if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) { + local_bh_enable(); + return; + } + + __ieee80211_rx_bss_hash_del(local, bss); + list_del(&bss->list); + spin_unlock_bh(&local->bss_lock); + ieee80211_rx_bss_free(bss); +} + +struct ieee80211_bss * +ieee80211_bss_info_update(struct ieee80211_local *local, + struct ieee80211_rx_status *rx_status, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee802_11_elems *elems, + int freq, bool beacon) +{ + struct ieee80211_bss *bss; + int clen; + +#ifdef CONFIG_MAC80211_MESH + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, freq); + else +#endif + bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq, + elems->ssid, elems->ssid_len); + if (!bss) { +#ifdef CONFIG_MAC80211_MESH + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, + elems->mesh_config_len, freq); + else +#endif + bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq, + elems->ssid, elems->ssid_len); + if (!bss) + return NULL; + } else { +#if 0 + /* TODO: order by RSSI? */ + spin_lock_bh(&local->bss_lock); + list_move_tail(&bss->list, &local->bss_list); + spin_unlock_bh(&local->bss_lock); +#endif + } + + /* save the ERP value so that it is available at association time */ + if (elems->erp_info && elems->erp_info_len >= 1) { + bss->erp_value = elems->erp_info[0]; + bss->has_erp_value = 1; + } + + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); + bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); + + if (elems->tim) { + struct ieee80211_tim_ie *tim_ie = + (struct ieee80211_tim_ie *)elems->tim; + bss->dtim_period = tim_ie->dtim_period; + } + + /* set default value for buggy APs */ + if (!elems->tim || bss->dtim_period == 0) + bss->dtim_period = 1; + + bss->supp_rates_len = 0; + if (elems->supp_rates) { + clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + if (clen > elems->supp_rates_len) + clen = elems->supp_rates_len; + memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, + clen); + bss->supp_rates_len += clen; + } + if (elems->ext_supp_rates) { + clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + if (clen > elems->ext_supp_rates_len) + clen = elems->ext_supp_rates_len; + memcpy(&bss->supp_rates[bss->supp_rates_len], + elems->ext_supp_rates, clen); + bss->supp_rates_len += clen; + } + + bss->band = rx_status->band; + + bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); + bss->last_update = jiffies; + bss->signal = rx_status->signal; + bss->noise = rx_status->noise; + bss->qual = rx_status->qual; + bss->wmm_used = elems->wmm_param || elems->wmm_info; + + if (!beacon) + bss->last_probe_resp = jiffies; + + /* + * For probe responses, or if we don't have any information yet, + * use the IEs from the beacon. + */ + if (!bss->ies || !beacon) { + if (bss->ies == NULL || bss->ies_len < elems->total_len) { + kfree(bss->ies); + bss->ies = kmalloc(elems->total_len, GFP_ATOMIC); + } + if (bss->ies) { + memcpy(bss->ies, elems->ie_start, elems->total_len); + bss->ies_len = elems->total_len; + } else + bss->ies_len = 0; + } + + return bss; +} + +ieee80211_rx_result +ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_mgmt *mgmt; + struct ieee80211_bss *bss; + u8 *elements; + struct ieee80211_channel *channel; + size_t baselen; + int freq; + __le16 fc; + bool presp, beacon = false; + struct ieee802_11_elems elems; + + if (skb->len < 2) + return RX_DROP_UNUSABLE; + + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = mgmt->frame_control; + + if (ieee80211_is_ctl(fc)) + return RX_CONTINUE; + + if (skb->len < 24) + return RX_DROP_MONITOR; + + presp = ieee80211_is_probe_resp(fc); + if (presp) { + /* ignore ProbeResp to foreign address */ + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) + return RX_DROP_MONITOR; + + presp = true; + elements = mgmt->u.probe_resp.variable; + baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); + } else { + beacon = ieee80211_is_beacon(fc); + baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); + elements = mgmt->u.beacon.variable; + } + + if (!presp && !beacon) + return RX_CONTINUE; + + if (baselen > skb->len) + return RX_DROP_MONITOR; + + ieee802_11_parse_elems(elements, skb->len - baselen, &elems); + + if (elems.ds_params && elems.ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + else + freq = rx_status->freq; + + channel = ieee80211_get_channel(sdata->local->hw.wiphy, freq); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return RX_DROP_MONITOR; + + bss = ieee80211_bss_info_update(sdata->local, rx_status, + mgmt, skb->len, &elems, + freq, beacon); + ieee80211_rx_bss_put(sdata->local, bss); + + dev_kfree_skb(skb); + return RX_QUEUED; +} + +static void ieee80211_send_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int powersave) +{ + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + __le16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(nullfunc, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); + if (powersave) + fc |= cpu_to_le16(IEEE80211_FCTL_PM); + nullfunc->frame_control = fc; + memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_scan_completed(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + union iwreq_data wrqu; + + if (WARN_ON(!local->hw_scanning && !local->sw_scanning)) + return; + + local->last_scan_completed = jiffies; + memset(&wrqu, 0, sizeof(wrqu)); + + /* + * local->scan_sdata could have been NULLed by the interface + * down code in case we were scanning on an interface that is + * being taken down. + */ + sdata = local->scan_sdata; + if (sdata) + wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL); + + if (local->hw_scanning) { + local->hw_scanning = false; + if (ieee80211_hw_config(local)) + printk(KERN_DEBUG "%s: failed to restore operational " + "channel after scan\n", wiphy_name(local->hw.wiphy)); + + goto done; + } + + local->sw_scanning = false; + if (ieee80211_hw_config(local)) + printk(KERN_DEBUG "%s: failed to restore operational " + "channel after scan\n", wiphy_name(local->hw.wiphy)); + + + netif_tx_lock_bh(local->mdev); + netif_addr_lock(local->mdev); + local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + + netif_addr_unlock(local->mdev); + netif_tx_unlock_bh(local->mdev); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + /* Tell AP we're back */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + ieee80211_send_nullfunc(local, sdata, 0); + netif_tx_wake_all_queues(sdata->dev); + } + } else + netif_tx_wake_all_queues(sdata->dev); + } + rcu_read_unlock(); + + done: + ieee80211_mlme_notify_scan_completed(local); + ieee80211_mesh_notify_scan_completed(local); +} +EXPORT_SYMBOL(ieee80211_scan_completed); + + +void ieee80211_scan_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, scan_work.work); + struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + int skip; + unsigned long next_delay = 0; + + /* + * Avoid re-scheduling when the sdata is going away. + */ + if (!netif_running(sdata->dev)) + return; + + switch (local->scan_state) { + case SCAN_SET_CHANNEL: + /* + * Get current scan band. scan_band may be IEEE80211_NUM_BANDS + * after we successfully scanned the last channel of the last + * band (and the last band is supported by the hw) + */ + if (local->scan_band < IEEE80211_NUM_BANDS) + sband = local->hw.wiphy->bands[local->scan_band]; + else + sband = NULL; + + /* + * If we are at an unsupported band and have more bands + * left to scan, advance to the next supported one. + */ + while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) { + local->scan_band++; + sband = local->hw.wiphy->bands[local->scan_band]; + local->scan_channel_idx = 0; + } + + /* if no more bands/channels left, complete scan */ + if (!sband || local->scan_channel_idx >= sband->n_channels) { + ieee80211_scan_completed(local_to_hw(local)); + return; + } + skip = 0; + chan = &sband->channels[local->scan_channel_idx]; + + if (chan->flags & IEEE80211_CHAN_DISABLED || + (sdata->vif.type == NL80211_IFTYPE_ADHOC && + chan->flags & IEEE80211_CHAN_NO_IBSS)) + skip = 1; + + if (!skip) { + local->scan_channel = chan; + if (ieee80211_hw_config(local)) { + printk(KERN_DEBUG "%s: failed to set freq to " + "%d MHz for scan\n", wiphy_name(local->hw.wiphy), + chan->center_freq); + skip = 1; + } + } + + /* advance state machine to next channel/band */ + local->scan_channel_idx++; + if (local->scan_channel_idx >= sband->n_channels) { + /* + * scan_band may end up == IEEE80211_NUM_BANDS, but + * we'll catch that case above and complete the scan + * if that is the case. + */ + local->scan_band++; + local->scan_channel_idx = 0; + } + + if (skip) + break; + + next_delay = IEEE80211_PROBE_DELAY + + usecs_to_jiffies(local->hw.channel_change_time); + local->scan_state = SCAN_SEND_PROBE; + break; + case SCAN_SEND_PROBE: + next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + local->scan_state = SCAN_SET_CHANNEL; + + if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) + break; + ieee80211_send_probe_req(sdata, NULL, local->scan_ssid, + local->scan_ssid_len); + next_delay = IEEE80211_CHANNEL_TIME; + break; + } + + queue_delayed_work(local->hw.workqueue, &local->scan_work, + next_delay); +} + + +int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = scan_sdata->local; + struct ieee80211_sub_if_data *sdata; + + if (ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + + /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) + * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS + * BSSID: MACAddress + * SSID + * ScanType: ACTIVE, PASSIVE + * ProbeDelay: delay (in microseconds) to be used prior to transmitting + * a Probe frame during active scanning + * ChannelList + * MinChannelTime (>= ProbeDelay), in TU + * MaxChannelTime: (>= MinChannelTime), in TU + */ + + /* MLME-SCAN.confirm + * BSSDescriptionSet + * ResultCode: SUCCESS, INVALID_PARAMETERS + */ + + if (local->sw_scanning || local->hw_scanning) { + if (local->scan_sdata == scan_sdata) + return 0; + return -EBUSY; + } + + if (local->ops->hw_scan) { + int rc; + + local->hw_scanning = true; + rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); + if (rc) { + local->hw_scanning = false; + return rc; + } + local->scan_sdata = scan_sdata; + return 0; + } + + local->sw_scanning = true; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + netif_tx_stop_all_queues(sdata->dev); + ieee80211_send_nullfunc(local, sdata, 1); + } + } else + netif_tx_stop_all_queues(sdata->dev); + } + rcu_read_unlock(); + + if (ssid) { + local->scan_ssid_len = ssid_len; + memcpy(local->scan_ssid, ssid, ssid_len); + } else + local->scan_ssid_len = 0; + local->scan_state = SCAN_SET_CHANNEL; + local->scan_channel_idx = 0; + local->scan_band = IEEE80211_BAND_2GHZ; + local->scan_sdata = scan_sdata; + + netif_addr_lock_bh(local->mdev); + local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + netif_addr_unlock_bh(local->mdev); + + /* TODO: start scan as soon as all nullfunc frames are ACKed */ + queue_delayed_work(local->hw.workqueue, &local->scan_work, + IEEE80211_CHANNEL_TIME); + + return 0; +} + + +int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return ieee80211_start_scan(sdata, ssid, ssid_len); + + /* + * STA has a state machine that might need to defer scanning + * while it's trying to associate/authenticate, therefore we + * queue it up to the state machine in that case. + */ + + if (local->sw_scanning || local->hw_scanning) { + if (local->scan_sdata == sdata) + return 0; + return -EBUSY; + } + + ifsta = &sdata->u.sta; + + ifsta->scan_ssid_len = ssid_len; + if (ssid_len) + memcpy(ifsta->scan_ssid, ssid, ssid_len); + set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); + queue_work(local->hw.workqueue, &ifsta->work); + + return 0; +} + + +static void ieee80211_scan_add_ies(struct iw_request_info *info, + struct ieee80211_bss *bss, + char **current_ev, char *end_buf) +{ + u8 *pos, *end, *next; + struct iw_event iwe; + + if (bss == NULL || bss->ies == NULL) + return; + + /* + * If needed, fragment the IEs buffer (at IE boundaries) into short + * enough fragments to fit into IW_GENERIC_IE_MAX octet messages. + */ + pos = bss->ies; + end = pos + bss->ies_len; + + while (end - pos > IW_GENERIC_IE_MAX) { + next = pos + 2 + pos[1]; + while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX) + next = next + 2 + next[1]; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = next - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + + pos = next; + } + + if (end > pos) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = end - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + } +} + + +static char * +ieee80211_scan_result(struct ieee80211_local *local, + struct iw_request_info *info, + struct ieee80211_bss *bss, + char *current_ev, char *end_buf) +{ + struct iw_event iwe; + char *buf; + + if (time_after(jiffies, + bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE)) + return current_ev; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWAP; + iwe.u.ap_addr.sa_family = ARPHRD_ETHER; + memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_ADDR_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWESSID; + if (bss_mesh_cfg(bss)) { + iwe.u.data.length = bss_mesh_id_len(bss); + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss_mesh_id(bss)); + } else { + iwe.u.data.length = bss->ssid_len; + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->ssid); + } + + if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) + || bss_mesh_cfg(bss)) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWMODE; + if (bss_mesh_cfg(bss)) + iwe.u.mode = IW_MODE_MESH; + else if (bss->capability & WLAN_CAPABILITY_ESS) + iwe.u.mode = IW_MODE_MASTER; + else + iwe.u.mode = IW_MODE_ADHOC; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, + &iwe, IW_EV_UINT_LEN); + } + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); + iwe.u.freq.e = 0; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = bss->freq; + iwe.u.freq.e = 6; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVQUAL; + iwe.u.qual.qual = bss->qual; + iwe.u.qual.level = bss->signal; + iwe.u.qual.noise = bss->noise; + iwe.u.qual.updated = local->wstats_flags; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_QUAL_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWENCODE; + if (bss->capability & WLAN_CAPABILITY_PRIVACY) + iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; + else + iwe.u.data.flags = IW_ENCODE_DISABLED; + iwe.u.data.length = 0; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ""); + + ieee80211_scan_add_ies(info, bss, ¤t_ev, end_buf); + + if (bss->supp_rates_len > 0) { + /* display all supported rates in readable format */ + char *p = current_ev + iwe_stream_lcp_len(info); + int i; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWRATE; + /* Those two flags are ignored... */ + iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; + + for (i = 0; i < bss->supp_rates_len; i++) { + iwe.u.bitrate.value = ((bss->supp_rates[i] & + 0x7f) * 500000); + p = iwe_stream_add_value(info, current_ev, p, + end_buf, &iwe, IW_EV_PARAM_LEN); + } + current_ev = p; + } + + buf = kmalloc(30, GFP_ATOMIC); + if (buf) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, buf); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, " Last beacon: %dms ago", + jiffies_to_msecs(jiffies - bss->last_update)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); + kfree(buf); + } + + if (bss_mesh_cfg(bss)) { + u8 *cfg = bss_mesh_cfg(bss); + buf = kmalloc(50, GFP_ATOMIC); + if (buf) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "Mesh network (version %d)", cfg[0]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Protocol ID: " + "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], + cfg[4]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Metric ID: " + "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], + cfg[8]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Congestion Control Mode ID: " + "0x%02X%02X%02X%02X", cfg[9], cfg[10], + cfg[11], cfg[12]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Channel Precedence: " + "0x%02X%02X%02X%02X", cfg[13], cfg[14], + cfg[15], cfg[16]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + kfree(buf); + } + } + + return current_ev; +} + + +int ieee80211_scan_results(struct ieee80211_local *local, + struct iw_request_info *info, + char *buf, size_t len) +{ + char *current_ev = buf; + char *end_buf = buf + len; + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + list_for_each_entry(bss, &local->bss_list, list) { + if (buf + len - current_ev <= IW_EV_ADDR_LEN) { + spin_unlock_bh(&local->bss_lock); + return -E2BIG; + } + current_ev = ieee80211_scan_result(local, info, bss, + current_ev, end_buf); + } + spin_unlock_bh(&local->bss_lock); + return current_ev - buf; +} diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c new file mode 100644 index 000000000000..f72bad636d8e --- /dev/null +++ b/net/mac80211/spectmgmt.c @@ -0,0 +1,86 @@ +/* + * spectrum management + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * Copyright 2007-2008, Intel Corporation + * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ieee80211.h> +#include <net/wireless.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "sta_info.h" +#include "wme.h" + +static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata, + struct ieee80211_msrment_ie *request_ie, + const u8 *da, const u8 *bssid, + u8 dialog_token) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *msr_report; + + skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + + sizeof(struct ieee80211_msrment_ie)); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "measurement report frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); + memset(msr_report, 0, 24); + memcpy(msr_report->da, da, ETH_ALEN); + memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(msr_report->bssid, bssid, ETH_ALEN); + msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); + msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; + msr_report->u.action.u.measurement.action_code = + WLAN_ACTION_SPCT_MSR_RPRT; + msr_report->u.action.u.measurement.dialog_token = dialog_token; + + msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; + msr_report->u.action.u.measurement.length = + sizeof(struct ieee80211_msrment_ie); + + memset(&msr_report->u.action.u.measurement.msr_elem, 0, + sizeof(struct ieee80211_msrment_ie)); + msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; + msr_report->u.action.u.measurement.msr_elem.mode |= + IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; + msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + /* + * Ignoring measurement request is spec violation. + * Mandatory measurements must be reported optional + * measurements might be refused or reported incapable + * For now just refuse + * TODO: Answer basic measurement as unmeasured + */ + ieee80211_send_refuse_measurement_request(sdata, + &mgmt->u.action.u.measurement.msr_elem, + mgmt->sa, mgmt->bssid, + mgmt->u.action.u.measurement.dialog_token); +} diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7d4fe4a52929..9b72d15bc8dc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -73,11 +73,11 @@ static int sta_info_hash_del(struct ieee80211_local *local, { struct sta_info *s; - s = local->sta_hash[STA_HASH(sta->addr)]; + s = local->sta_hash[STA_HASH(sta->sta.addr)]; if (!s) return -ENOENT; if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -93,26 +93,19 @@ static int sta_info_hash_del(struct ieee80211_local *local, } /* protected by RCU */ -static struct sta_info *__sta_info_find(struct ieee80211_local *local, - u8 *addr) +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) { struct sta_info *sta; sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->addr, addr) == 0) + if (compare_ether_addr(sta->sta.addr, addr) == 0) break; sta = rcu_dereference(sta->hnext); } return sta; } -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) -{ - return __sta_info_find(local, addr); -} -EXPORT_SYMBOL(sta_info_get); - struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, struct net_device *dev) { @@ -135,6 +128,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, /** * __sta_info_free - internal STA free helper * + * @local: pointer to the global information * @sta: STA info to free * * This function must undo everything done by sta_info_alloc() @@ -145,12 +139,12 @@ static void __sta_info_free(struct ieee80211_local *local, { DECLARE_MAC_BUF(mbuf); - rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); + rate_control_free_sta(sta); rate_control_put(sta->rate_ctrl); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Destroyed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ kfree(sta); @@ -202,14 +196,12 @@ void sta_info_destroy(struct sta_info *sta) dev_kfree_skb_any(skb); for (i = 0; i < STA_TID_NUM; i++) { - spin_lock_bh(&sta->ampdu_mlme.ampdu_rx); + spin_lock_bh(&sta->lock); if (sta->ampdu_mlme.tid_rx[i]) del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx); - spin_lock_bh(&sta->ampdu_mlme.ampdu_tx); if (sta->ampdu_mlme.tid_tx[i]) del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer); - spin_unlock_bh(&sta->ampdu_mlme.ampdu_tx); + spin_unlock_bh(&sta->lock); } __sta_info_free(local, sta); @@ -220,8 +212,8 @@ void sta_info_destroy(struct sta_info *sta) static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - sta->hnext = local->sta_hash[STA_HASH(sta->addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], sta); + sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, @@ -232,32 +224,33 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, int i; DECLARE_MAC_BUF(mbuf); - sta = kzalloc(sizeof(*sta), gfp); + sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); if (!sta) return NULL; - memcpy(sta->addr, addr, ETH_ALEN); + spin_lock_init(&sta->lock); + spin_lock_init(&sta->flaglock); + + memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; sta->rate_ctrl = rate_control_get(local->rate_ctrl); sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - gfp); + &sta->sta, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); kfree(sta); return NULL; } - spin_lock_init(&sta->ampdu_mlme.ampdu_rx); - spin_lock_init(&sta->ampdu_mlme.ampdu_tx); for (i = 0; i < STA_TID_NUM; i++) { /* timer_to_tid must be initialized with identity mapping to * enable session_timer's data differentiation. refer to * sta_rx_agg_session_timer_expired for useage */ sta->timer_to_tid[i] = i; /* tid to tx queue: initialize according to HW (0 is valid) */ - sta->tid_to_tx_q[i] = local->hw.queues; + sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw); /* rx */ sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; sta->ampdu_mlme.tid_rx[i] = NULL; @@ -271,12 +264,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Allocated STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_MESH sta->plink_state = PLINK_LISTEN; - spin_lock_init(&sta->plink_lock); init_timer(&sta->plink_timer); #endif @@ -301,15 +293,15 @@ int sta_info_insert(struct sta_info *sta) goto out_free; } - if (WARN_ON(compare_ether_addr(sta->addr, sdata->dev->dev_addr) == 0 || - is_multicast_ether_addr(sta->addr))) { + if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 || + is_multicast_ether_addr(sta->sta.addr))) { err = -EINVAL; goto out_free; } spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (__sta_info_find(local, sta->addr)) { + if (sta_info_get(local, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); err = -EEXIST; goto out_free; @@ -320,16 +312,18 @@ int sta_info_insert(struct sta_info *sta) /* notify driver */ if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - sdata = sdata->u.vlan.ap; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_ADD, sta->addr); + STA_NOTIFY_ADD, &sta->sta); } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Inserted STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mac, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ spin_unlock_irqrestore(&local->sta_lock, flags); @@ -376,11 +370,14 @@ static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, struct sta_info *sta) { - if (bss) - __bss_tim_set(bss, sta->aid); + BUG_ON(!bss); + + __bss_tim_set(bss, sta->sta.aid); + if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, true); sta->local->tim_in_locked_section = false; } } @@ -389,6 +386,8 @@ void sta_info_set_tim_bit(struct sta_info *sta) { unsigned long flags; + BUG_ON(!sta->sdata->bss); + spin_lock_irqsave(&sta->local->sta_lock, flags); __sta_info_set_tim_bit(sta->sdata->bss, sta); spin_unlock_irqrestore(&sta->local->sta_lock, flags); @@ -397,11 +396,14 @@ void sta_info_set_tim_bit(struct sta_info *sta) static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, struct sta_info *sta) { - if (bss) - __bss_tim_clear(bss, sta->aid); + BUG_ON(!bss); + + __bss_tim_clear(bss, sta->sta.aid); + if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, false); sta->local->tim_in_locked_section = false; } } @@ -410,12 +412,14 @@ void sta_info_clear_tim_bit(struct sta_info *sta) { unsigned long flags; + BUG_ON(!sta->sdata->bss); + spin_lock_irqsave(&sta->local->sta_lock, flags); __sta_info_clear_tim_bit(sta->sdata->bss, sta); spin_unlock_irqrestore(&sta->local->sta_lock, flags); } -void __sta_info_unlink(struct sta_info **sta) +static void __sta_info_unlink(struct sta_info **sta) { struct ieee80211_local *local = (*sta)->local; struct ieee80211_sub_if_data *sdata = (*sta)->sdata; @@ -437,21 +441,23 @@ void __sta_info_unlink(struct sta_info **sta) list_del(&(*sta)->list); - if ((*sta)->flags & WLAN_STA_PS) { - (*sta)->flags &= ~WLAN_STA_PS; - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); + if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) { + BUG_ON(!sdata->bss); + + atomic_dec(&sdata->bss->num_sta_ps); __sta_info_clear_tim_bit(sdata->bss, *sta); } local->num_sta--; if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - sdata = sdata->u.vlan.ap; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_REMOVE, (*sta)->addr); + STA_NOTIFY_REMOVE, &(*sta)->sta); } if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -463,7 +469,7 @@ void __sta_info_unlink(struct sta_info **sta) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Removed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ /* @@ -515,20 +521,20 @@ static inline int sta_info_buffer_expired(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_tx_info *info; int timeout; if (!skb) return 0; - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + info = IEEE80211_SKB_CB(skb); /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */ timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 / 15625) * HZ; if (timeout < STA_TX_BUFFER_EXPIRE) timeout = STA_TX_BUFFER_EXPIRE; - return time_after(jiffies, pkt_data->jiffies + timeout); + return time_after(jiffies, info->control.jiffies + timeout); } @@ -557,8 +563,10 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, sdata = sta->sdata; local->total_ps_buffered--; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "Buffered frame expired (STA " - "%s)\n", print_mac(mac, sta->addr)); + "%s)\n", print_mac(mac, sta->sta.addr)); +#endif dev_kfree_skb(skb); if (skb_queue_empty(&sta->ps_tx_buf)) @@ -789,3 +797,40 @@ void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata) schedule_work(&local->sta_flush_work); spin_unlock_irqrestore(&local->sta_lock, flags); } + +void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, + unsigned long exp_time) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); + DECLARE_MAC_BUF(mac); + unsigned long flags; + + spin_lock_irqsave(&local->sta_lock, flags); + list_for_each_entry_safe(sta, tmp, &local->sta_list, list) + if (time_after(jiffies, sta->last_rx + exp_time)) { +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: expiring inactive STA %s\n", + sdata->dev->name, print_mac(mac, sta->sta.addr)); +#endif + __sta_info_unlink(&sta); + if (sta) + list_add(&sta->list, &tmp_list); + } + spin_unlock_irqrestore(&local->sta_lock, flags); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) + sta_info_destroy(sta); +} + +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, + const u8 *addr) +{ + struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); + + if (!sta) + return NULL; + return &sta->sta; +} +EXPORT_SYMBOL(ieee80211_find_sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index f8c95bc9659c..a6b51862a89d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -32,7 +32,7 @@ * @WLAN_STA_WDS: Station is one of our WDS peers. * @WLAN_STA_PSPOLL: Station has just PS-polled us. * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the - * IEEE80211_TXCTL_CLEAR_PS_FILT control flag) when the next + * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next * frame to this station is transmitted. */ enum ieee80211_sta_info_flags { @@ -129,23 +129,19 @@ enum plink_state { * * @tid_state_rx: TID's state in Rx session state machine. * @tid_rx: aggregation info for Rx per TID - * @ampdu_rx: for locking sections in aggregation Rx flow * @tid_state_tx: TID's state in Tx session state machine. * @tid_tx: aggregation info for Tx per TID * @addba_req_num: number of times addBA request has been sent. - * @ampdu_tx: for locking sectionsi in aggregation Tx flow * @dialog_token_allocator: dialog token enumerator for each new session; */ struct sta_ampdu_mlme { /* rx */ u8 tid_state_rx[STA_TID_NUM]; struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; - spinlock_t ampdu_rx; /* tx */ u8 tid_state_tx[STA_TID_NUM]; struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; u8 addba_req_num[STA_TID_NUM]; - spinlock_t ampdu_tx; u8 dialog_token_allocator; }; @@ -164,9 +160,18 @@ struct sta_ampdu_mlme { * @list: global linked list entry * @hnext: hash table linked list pointer * @local: pointer to the global information + * @sdata: TBD + * @key: TBD + * @rate_ctrl: TBD + * @rate_ctrl_priv: TBD + * @lock: used for locking all fields that require locking, see comments + * in the header file. + * @flaglock: spinlock for flags accesses * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode + * @listen_interval: TBD + * @pin_status: TBD * @flags: STA flags, see &enum ieee80211_sta_info_flags * @ps_tx_buf: buffer of frames to transmit to this station * when it leaves power saving state @@ -175,8 +180,37 @@ struct sta_ampdu_mlme { * power saving state * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA - * @supp_rates: Bitmap of supported rates (per band) - * @ht_info: HT capabilities of this STA + * @wep_weak_iv_count: TBD + * @last_rx: TBD + * @num_duplicates: number of duplicate frames received from this STA + * @rx_fragments: number of received MPDUs + * @rx_dropped: number of dropped MPDUs from this STA + * @last_signal: signal of last received frame from this STA + * @last_qual: qual of last received frame from this STA + * @last_noise: noise of last received frame from this STA + * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) + * @tx_filtered_count: TBD + * @tx_retry_failed: TBD + * @tx_retry_count: TBD + * @fail_avg: moving percentage of failed MSDUs + * @tx_packets: number of RX/TX MSDUs + * @tx_bytes: TBD + * @tx_fragments: number of transmitted MPDUs + * @last_txrate_idx: Index of the last used transmit rate + * @tid_seq: TBD + * @ampdu_mlme: TBD + * @timer_to_tid: identity mapping to ID timers + * @tid_to_tx_q: map tid to tx queue + * @llid: Local link ID + * @plid: Peer link ID + * @reason: Cancel reason on PLINK_HOLDING state + * @plink_retries: Retries in establishment + * @ignore_plink_timer: TBD + * @plink_state plink_state: TBD + * @plink_timeout: TBD + * @plink_timer: TBD + * @debugfs: debug filesystem info + * @sta: station information we share with the driver */ struct sta_info { /* General information, mostly static */ @@ -187,10 +221,9 @@ struct sta_info { struct ieee80211_key *key; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; - struct ieee80211_ht_info ht_info; - u64 supp_rates[IEEE80211_NUM_BANDS]; - u8 addr[ETH_ALEN]; - u16 aid; + spinlock_t lock; + spinlock_t flaglock; + u16 listen_interval; /* @@ -199,7 +232,10 @@ struct sta_info { */ u8 pin_status; - /* frequently updated information, needs locking? */ + /* + * frequently updated, locked with own spinlock (flaglock), + * use the accessors defined below + */ u32 flags; /* @@ -213,66 +249,47 @@ struct sta_info { unsigned long rx_packets, rx_bytes; unsigned long wep_weak_iv_count; unsigned long last_rx; - unsigned long num_duplicates; /* number of duplicate frames received - * from this STA */ - unsigned long rx_fragments; /* number of received MPDUs */ - unsigned long rx_dropped; /* number of dropped MPDUs from this STA */ - int last_rssi; /* RSSI of last received frame from this STA */ - int last_signal; /* signal of last received frame from this STA */ - int last_noise; /* noise of last received frame from this STA */ - /* last received seq/frag number from this STA (per RX queue) */ + unsigned long num_duplicates; + unsigned long rx_fragments; + unsigned long rx_dropped; + int last_signal; + int last_qual; + int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; -#endif /* Updated from TX status path only, no locking requirements */ unsigned long tx_filtered_count; unsigned long tx_retry_failed, tx_retry_count; - /* TODO: update in generic code not rate control? */ - u32 tx_num_consecutive_failures; - u32 tx_num_mpdu_ok; - u32 tx_num_mpdu_fail; /* moving percentage of failed MSDUs */ unsigned int fail_avg; /* Updated from TX path only, no locking requirements */ - unsigned long tx_packets; /* number of RX/TX MSDUs */ + unsigned long tx_packets; unsigned long tx_bytes; - unsigned long tx_fragments; /* number of transmitted MPDUs */ - int txrate_idx; - int last_txrate_idx; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; -#endif - - /* Debug counters, no locking doesn't matter */ - int channel_use; - int channel_use_raw; + unsigned long tx_fragments; + unsigned int last_txrate_idx; + u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; /* - * Aggregation information, comes with own locking. + * Aggregation information, locked with lock. */ struct sta_ampdu_mlme ampdu_mlme; - u8 timer_to_tid[STA_TID_NUM]; /* identity mapping to ID timers */ - u8 tid_to_tx_q[STA_TID_NUM]; /* map tid to tx queue */ + u8 timer_to_tid[STA_TID_NUM]; + u8 tid_to_tx_q[STA_TID_NUM]; #ifdef CONFIG_MAC80211_MESH /* * Mesh peer link attributes * TODO: move to a sub-structure that is referenced with pointer? */ - __le16 llid; /* Local link ID */ - __le16 plid; /* Peer link ID */ - __le16 reason; /* Cancel reason on PLINK_HOLDING state */ - u8 plink_retries; /* Retries in establishment */ + __le16 llid; + __le16 plid; + __le16 reason; + u8 plink_retries; bool ignore_plink_timer; enum plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; - spinlock_t plink_lock; /* For peer_state reads / updates and other - updates in the structure. Ensures robust - transitions for the peerlink FSM */ #endif #ifdef CONFIG_MAC80211_DEBUGFS @@ -282,13 +299,12 @@ struct sta_info { struct dentry *num_ps_buf_frames; struct dentry *inactive_ms; struct dentry *last_seq_ctrl; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - struct dentry *wme_rx_queue; - struct dentry *wme_tx_queue; -#endif struct dentry *agg_status; } debugfs; #endif + + /* keep last! */ + struct ieee80211_sta sta; }; static inline enum plink_state sta_plink_state(struct sta_info *sta) @@ -299,6 +315,73 @@ static inline enum plink_state sta_plink_state(struct sta_info *sta) return PLINK_LISTEN; } +static inline void set_sta_flags(struct sta_info *sta, const u32 flags) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags |= flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline void clear_sta_flags(struct sta_info *sta, const u32 flags) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags &= ~flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline void set_and_clear_sta_flags(struct sta_info *sta, + const u32 set, const u32 clear) +{ + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + sta->flags |= set; + sta->flags &= ~clear; + spin_unlock_irqrestore(&sta->flaglock, irqfl); +} + +static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags & flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + +static inline u32 test_and_clear_sta_flags(struct sta_info *sta, + const u32 flags) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags & flags; + sta->flags &= ~flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + +static inline u32 get_sta_flags(struct sta_info *sta) +{ + u32 ret; + unsigned long irqfl; + + spin_lock_irqsave(&sta->flaglock, irqfl); + ret = sta->flags; + spin_unlock_irqrestore(&sta->flaglock, irqfl); + + return ret; +} + /* Maximum number of concurrently registered stations */ #define MAX_STA_COUNT 2007 @@ -321,7 +404,7 @@ static inline enum plink_state sta_plink_state(struct sta_info *sta) /* * Get a STA info, must have be under RCU read lock. */ -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr); +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr); /* * Get STA info by index, BROKEN! */ @@ -347,7 +430,6 @@ int sta_info_insert(struct sta_info *sta); * has already unlinked it. */ void sta_info_unlink(struct sta_info **sta); -void __sta_info_unlink(struct sta_info **sta); void sta_info_destroy(struct sta_info *sta); void sta_info_set_tim_bit(struct sta_info *sta); @@ -359,5 +441,7 @@ void sta_info_stop(struct ieee80211_local *local); int sta_info_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, + unsigned long exp_time); #endif /* STA_INFO_H */ diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 09093da24af6..34b32bc8f609 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -6,25 +6,23 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - #include <linux/kernel.h> +#include <linux/bitops.h> #include <linux/types.h> #include <linux/netdevice.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "key.h" #include "tkip.h" #include "wep.h" - -/* TKIP key mixing functions */ - - #define PHASE1_LOOP_COUNT 8 - -/* 2-byte by 2-byte subset of the full AES S-box table; second part of this - * table is identical to first part but byte-swapped */ +/* + * 2-byte by 2-byte subset of the full AES S-box table; second part of this + * table is identical to first part but byte-swapped + */ static const u16 tkip_sbox[256] = { 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, @@ -61,84 +59,54 @@ static const u16 tkip_sbox[256] = 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, }; - -static inline u16 Mk16(u8 x, u8 y) +static u16 tkipS(u16 val) { - return ((u16) x << 8) | (u16) y; + return tkip_sbox[val & 0xff] ^ swab16(tkip_sbox[val >> 8]); } - -static inline u8 Hi8(u16 v) -{ - return v >> 8; -} - - -static inline u8 Lo8(u16 v) -{ - return v & 0xff; -} - - -static inline u16 Hi16(u32 v) -{ - return v >> 16; -} - - -static inline u16 Lo16(u32 v) -{ - return v & 0xffff; -} - - -static inline u16 RotR1(u16 v) -{ - return (v >> 1) | ((v & 0x0001) << 15); -} - - -static inline u16 tkip_S(u16 val) +static u8 *write_tkip_iv(u8 *pos, u16 iv16) { - u16 a = tkip_sbox[Hi8(val)]; - - return tkip_sbox[Lo8(val)] ^ Hi8(a) ^ (Lo8(a) << 8); + *pos++ = iv16 >> 8; + *pos++ = ((iv16 >> 8) | 0x20) & 0x7f; + *pos++ = iv16 & 0xFF; + return pos; } - - -/* P1K := Phase1(TA, TK, TSC) +/* + * P1K := Phase1(TA, TK, TSC) * TA = transmitter address (48 bits) * TK = dot11DefaultKeyValue or dot11KeyMappingValue (128 bits) * TSC = TKIP sequence counter (48 bits, only 32 msb bits used) * P1K: 80 bits */ -static void tkip_mixing_phase1(const u8 *ta, const u8 *tk, u32 tsc_IV32, - u16 *p1k) +static void tkip_mixing_phase1(const u8 *tk, struct tkip_ctx *ctx, + const u8 *ta, u32 tsc_IV32) { int i, j; + u16 *p1k = ctx->p1k; - p1k[0] = Lo16(tsc_IV32); - p1k[1] = Hi16(tsc_IV32); - p1k[2] = Mk16(ta[1], ta[0]); - p1k[3] = Mk16(ta[3], ta[2]); - p1k[4] = Mk16(ta[5], ta[4]); + p1k[0] = tsc_IV32 & 0xFFFF; + p1k[1] = tsc_IV32 >> 16; + p1k[2] = get_unaligned_le16(ta + 0); + p1k[3] = get_unaligned_le16(ta + 2); + p1k[4] = get_unaligned_le16(ta + 4); for (i = 0; i < PHASE1_LOOP_COUNT; i++) { j = 2 * (i & 1); - p1k[0] += tkip_S(p1k[4] ^ Mk16(tk[ 1 + j], tk[ 0 + j])); - p1k[1] += tkip_S(p1k[0] ^ Mk16(tk[ 5 + j], tk[ 4 + j])); - p1k[2] += tkip_S(p1k[1] ^ Mk16(tk[ 9 + j], tk[ 8 + j])); - p1k[3] += tkip_S(p1k[2] ^ Mk16(tk[13 + j], tk[12 + j])); - p1k[4] += tkip_S(p1k[3] ^ Mk16(tk[ 1 + j], tk[ 0 + j])) + i; + p1k[0] += tkipS(p1k[4] ^ get_unaligned_le16(tk + 0 + j)); + p1k[1] += tkipS(p1k[0] ^ get_unaligned_le16(tk + 4 + j)); + p1k[2] += tkipS(p1k[1] ^ get_unaligned_le16(tk + 8 + j)); + p1k[3] += tkipS(p1k[2] ^ get_unaligned_le16(tk + 12 + j)); + p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i; } + ctx->initialized = 1; } - -static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16, - u8 *rc4key) +static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx, + u16 tsc_IV16, u8 *rc4key) { u16 ppk[6]; + const u16 *p1k = ctx->p1k; int i; ppk[0] = p1k[0]; @@ -148,70 +116,35 @@ static void tkip_mixing_phase2(const u16 *p1k, const u8 *tk, u16 tsc_IV16, ppk[4] = p1k[4]; ppk[5] = p1k[4] + tsc_IV16; - ppk[0] += tkip_S(ppk[5] ^ Mk16(tk[ 1], tk[ 0])); - ppk[1] += tkip_S(ppk[0] ^ Mk16(tk[ 3], tk[ 2])); - ppk[2] += tkip_S(ppk[1] ^ Mk16(tk[ 5], tk[ 4])); - ppk[3] += tkip_S(ppk[2] ^ Mk16(tk[ 7], tk[ 6])); - ppk[4] += tkip_S(ppk[3] ^ Mk16(tk[ 9], tk[ 8])); - ppk[5] += tkip_S(ppk[4] ^ Mk16(tk[11], tk[10])); - ppk[0] += RotR1(ppk[5] ^ Mk16(tk[13], tk[12])); - ppk[1] += RotR1(ppk[0] ^ Mk16(tk[15], tk[14])); - ppk[2] += RotR1(ppk[1]); - ppk[3] += RotR1(ppk[2]); - ppk[4] += RotR1(ppk[3]); - ppk[5] += RotR1(ppk[4]); - - rc4key[0] = Hi8(tsc_IV16); - rc4key[1] = (Hi8(tsc_IV16) | 0x20) & 0x7f; - rc4key[2] = Lo8(tsc_IV16); - rc4key[3] = Lo8((ppk[5] ^ Mk16(tk[1], tk[0])) >> 1); - - for (i = 0; i < 6; i++) { - rc4key[4 + 2 * i] = Lo8(ppk[i]); - rc4key[5 + 2 * i] = Hi8(ppk[i]); - } + ppk[0] += tkipS(ppk[5] ^ get_unaligned_le16(tk + 0)); + ppk[1] += tkipS(ppk[0] ^ get_unaligned_le16(tk + 2)); + ppk[2] += tkipS(ppk[1] ^ get_unaligned_le16(tk + 4)); + ppk[3] += tkipS(ppk[2] ^ get_unaligned_le16(tk + 6)); + ppk[4] += tkipS(ppk[3] ^ get_unaligned_le16(tk + 8)); + ppk[5] += tkipS(ppk[4] ^ get_unaligned_le16(tk + 10)); + ppk[0] += ror16(ppk[5] ^ get_unaligned_le16(tk + 12), 1); + ppk[1] += ror16(ppk[0] ^ get_unaligned_le16(tk + 14), 1); + ppk[2] += ror16(ppk[1], 1); + ppk[3] += ror16(ppk[2], 1); + ppk[4] += ror16(ppk[3], 1); + ppk[5] += ror16(ppk[4], 1); + + rc4key = write_tkip_iv(rc4key, tsc_IV16); + *rc4key++ = ((ppk[5] ^ get_unaligned_le16(tk)) >> 1) & 0xFF; + + for (i = 0; i < 6; i++) + put_unaligned_le16(ppk[i], rc4key + 2 * i); } - /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets * of the IV. Returns pointer to the octet following IVs (i.e., beginning of * the packet payload). */ -u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, - u8 iv0, u8 iv1, u8 iv2) +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16) { - *pos++ = iv0; - *pos++ = iv1; - *pos++ = iv2; + pos = write_tkip_iv(pos, iv16); *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; - *pos++ = key->u.tkip.iv32 & 0xff; - *pos++ = (key->u.tkip.iv32 >> 8) & 0xff; - *pos++ = (key->u.tkip.iv32 >> 16) & 0xff; - *pos++ = (key->u.tkip.iv32 >> 24) & 0xff; - return pos; -} - - -void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, - u16 *phase1key) -{ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv32, phase1key); -} - -void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, - u8 *rc4key) -{ - /* Calculate per-packet key */ - if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) { - /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv32, key->u.tkip.p1k); - key->u.tkip.tx_initialized = 1; - } - - tkip_mixing_phase2(key->u.tkip.p1k, - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - key->u.tkip.iv16, rc4key); + put_unaligned_le32(key->u.tkip.tx.iv32, pos); + return pos + 4; } void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, @@ -220,48 +153,44 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, { struct ieee80211_key *key = (struct ieee80211_key *) container_of(keyconf, struct ieee80211_key, conf); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u8 *data = (u8 *) hdr; - u16 fc = le16_to_cpu(hdr->frame_control); - int hdr_len = ieee80211_get_hdrlen(fc); - u8 *ta = hdr->addr2; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + u8 *data; + const u8 *tk; + struct tkip_ctx *ctx; u16 iv16; u32 iv32; - iv16 = data[hdr_len] << 8; - iv16 += data[hdr_len + 2]; - iv32 = data[hdr_len + 4] | (data[hdr_len + 5] << 8) | - (data[hdr_len + 6] << 16) | (data[hdr_len + 7] << 24); + data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); + iv16 = data[2] | (data[0] << 8); + iv32 = get_unaligned_le32(&data[4]); + + tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + ctx = &key->u.tkip.tx; -#ifdef CONFIG_TKIP_DEBUG +#ifdef CONFIG_MAC80211_TKIP_DEBUG printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n", iv16, iv32); - if (iv32 != key->u.tkip.iv32) { + if (iv32 != ctx->iv32) { printk(KERN_DEBUG "skb: iv32 = 0x%08x key: iv32 = 0x%08x\n", - iv32, key->u.tkip.iv32); + iv32, ctx->iv32); printk(KERN_DEBUG "Wrap around of iv16 in the middle of a " "fragmented packet\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif /* Update the p1k only when the iv16 in the packet wraps around, this * might occur after the wrap around of iv16 in the key in case of * fragmented packets. */ - if (iv16 == 0 || !key->u.tkip.tx_initialized) { - /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv32, key->u.tkip.p1k); - key->u.tkip.tx_initialized = 1; - } + if (iv16 == 0 || !ctx->initialized) + tkip_mixing_phase1(tk, ctx, hdr->addr2, iv32); if (type == IEEE80211_TKIP_P1_KEY) { - memcpy(outkey, key->u.tkip.p1k, sizeof(u16) * 5); + memcpy(outkey, ctx->p1k, sizeof(u16) * 5); return; } - tkip_mixing_phase2(key->u.tkip.p1k, - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv16, outkey); + tkip_mixing_phase2(tk, ctx, iv16, outkey); } EXPORT_SYMBOL(ieee80211_get_tkip_key); @@ -275,13 +204,19 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, u8 *pos, size_t payload_len, u8 *ta) { u8 rc4key[16]; + struct tkip_ctx *ctx = &key->u.tkip.tx; + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + + /* Calculate per-packet key */ + if (ctx->iv16 == 0 || !ctx->initialized) + tkip_mixing_phase1(tk, ctx, ta, ctx->iv32); + + tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); - ieee80211_tkip_gen_rc4key(key, ta, rc4key); - pos = ieee80211_tkip_add_iv(pos, key, rc4key[0], rc4key[1], rc4key[2]); + pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16); ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); } - /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the * beginning of the buffer containing IEEE 802.11 header payload, i.e., * including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the @@ -296,15 +231,16 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, u32 iv16; u8 rc4key[16], keyid, *pos = payload; int res; + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; if (payload_len < 12) return -1; iv16 = (pos[0] << 8) | pos[2]; keyid = pos[3]; - iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); + iv32 = get_unaligned_le32(pos + 4); pos += 8; -#ifdef CONFIG_TKIP_DEBUG +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; printk(KERN_DEBUG "TKIP decrypt: data(len=%zd)", payload_len); @@ -314,7 +250,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, printk(KERN_DEBUG "TKIP decrypt: iv16=%04x iv32=%08x\n", iv16, iv32); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif if (!(keyid & (1 << 5))) return TKIP_DECRYPT_NO_EXT_IV; @@ -322,69 +258,65 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; - if (key->u.tkip.rx_initialized[queue] && - (iv32 < key->u.tkip.iv32_rx[queue] || - (iv32 == key->u.tkip.iv32_rx[queue] && - iv16 <= key->u.tkip.iv16_rx[queue]))) { -#ifdef CONFIG_TKIP_DEBUG + if (key->u.tkip.rx[queue].initialized && + (iv32 < key->u.tkip.rx[queue].iv32 || + (iv32 == key->u.tkip.rx[queue].iv32 && + iv16 <= key->u.tkip.rx[queue].iv16))) { +#ifdef CONFIG_MAC80211_TKIP_DEBUG DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP replay detected for RX frame from " "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", print_mac(mac, ta), - iv32, iv16, key->u.tkip.iv32_rx[queue], - key->u.tkip.iv16_rx[queue]); -#endif /* CONFIG_TKIP_DEBUG */ + iv32, iv16, key->u.tkip.rx[queue].iv32, + key->u.tkip.rx[queue].iv16); +#endif return TKIP_DECRYPT_REPLAY; } if (only_iv) { res = TKIP_DECRYPT_OK; - key->u.tkip.rx_initialized[queue] = 1; + key->u.tkip.rx[queue].initialized = 1; goto done; } - if (!key->u.tkip.rx_initialized[queue] || - key->u.tkip.iv32_rx[queue] != iv32) { - key->u.tkip.rx_initialized[queue] = 1; + if (!key->u.tkip.rx[queue].initialized || + key->u.tkip.rx[queue].iv32 != iv32) { /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv32, key->u.tkip.p1k_rx[queue]); -#ifdef CONFIG_TKIP_DEBUG + tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32); +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; + u8 key_offset = NL80211_TKIP_DATA_OFFSET_ENCR_KEY; DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s" " TK=", print_mac(mac, ta)); for (i = 0; i < 16; i++) printk("%02x ", - key->conf.key[ - ALG_TKIP_TEMP_ENCR_KEY + i]); + key->conf.key[key_offset + i]); printk("\n"); printk(KERN_DEBUG "TKIP decrypt: P1K="); for (i = 0; i < 5; i++) - printk("%04x ", key->u.tkip.p1k_rx[queue][i]); + printk("%04x ", key->u.tkip.rx[queue].p1k[i]); printk("\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif if (key->local->ops->update_tkip_key && key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - u8 *sta_addr = key->sta->addr; + u8 *sta_addr = key->sta->sta.addr; if (is_multicast_ether_addr(ra)) sta_addr = bcast; key->local->ops->update_tkip_key( local_to_hw(key->local), &key->conf, - sta_addr, iv32, key->u.tkip.p1k_rx[queue]); + sta_addr, iv32, key->u.tkip.rx[queue].p1k); } } - tkip_mixing_phase2(key->u.tkip.p1k_rx[queue], - &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], - iv16, rc4key); -#ifdef CONFIG_TKIP_DEBUG + tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key); +#ifdef CONFIG_MAC80211_TKIP_DEBUG { int i; printk(KERN_DEBUG "TKIP decrypt: Phase2 rc4key="); @@ -392,7 +324,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, printk("%02x ", rc4key[i]); printk("\n"); } -#endif /* CONFIG_TKIP_DEBUG */ +#endif res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); done: @@ -409,5 +341,3 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, return res; } - - diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index b7c2ee763d9d..d4714383f5fc 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -13,12 +13,8 @@ #include <linux/crypto.h> #include "key.h" -u8 * ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, - u8 iv0, u8 iv1, u8 iv2); -void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, - u16 *phase1key); -void ieee80211_tkip_gen_rc4key(struct ieee80211_key *key, u8 *ta, - u8 *rc4key); +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); + void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *pos, size_t payload_len, u8 *ta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c80d5899f279..1460537faf33 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -38,64 +38,17 @@ /* misc utils */ -static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, - struct ieee80211_hdr *hdr) -{ - /* Set the sequence number for this frame. */ - hdr->seq_ctrl = cpu_to_le16(sdata->sequence); - - /* Increase the sequence number. */ - sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; -} - -#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP -static void ieee80211_dump_frame(const char *ifname, const char *title, - const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; - DECLARE_MAC_BUF(mac); - - printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); - if (skb->len < 4) { - printk("\n"); - return; - } - - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); - if (hdrlen > skb->len) - hdrlen = skb->len; - if (hdrlen >= 4) - printk(" FC=0x%04x DUR=0x%04x", - fc, le16_to_cpu(hdr->duration_id)); - if (hdrlen >= 10) - printk(" A1=%s", print_mac(mac, hdr->addr1)); - if (hdrlen >= 16) - printk(" A2=%s", print_mac(mac, hdr->addr2)); - if (hdrlen >= 24) - printk(" A3=%s", print_mac(mac, hdr->addr3)); - if (hdrlen >= 30) - printk(" A4=%s", print_mac(mac, hdr->addr4)); - printk("\n"); -} -#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static inline void ieee80211_dump_frame(const char *ifname, const char *title, - struct sk_buff *skb) -{ -} -#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ - -static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, - int next_frag_len) +static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, + int next_frag_len) { int rate, mrate, erp, dur, i; - struct ieee80211_rate *txrate = tx->rate; + struct ieee80211_rate *txrate; struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; + struct ieee80211_hdr *hdr; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[tx->channel->band]; + txrate = &sband->bitrates[tx->rate_idx]; erp = 0; if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) @@ -118,10 +71,10 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, * at the highest possible rate belonging to the PHY rates in the * BSSBasicRateSet */ - - if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { + hdr = (struct ieee80211_hdr *)tx->skb->data; + if (ieee80211_is_ctl(hdr->frame_control)) { /* TODO: These control frames are not currently sent by - * 80211.o, but should they be implemented, this function + * mac80211, but should they be implemented, this function * needs to be updated to support duration field calculation. * * RTS: time needed to transmit pending data/mgmt frame plus @@ -139,7 +92,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, /* data/mgmt */ if (0 /* FIX: data/mgmt during CFP */) - return 32768; + return cpu_to_le16(32768); if (group_addr) /* Group address as the destination - no ACK */ return 0; @@ -163,7 +116,7 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, if (r->bitrate > txrate->bitrate) break; - if (tx->sdata->basic_rates & BIT(i)) + if (tx->sdata->bss_conf.basic_rates & BIT(i)) rate = r->bitrate; switch (sband->band) { @@ -209,59 +162,44 @@ static u16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, tx->sdata->bss_conf.use_short_preamble); } - return dur; + return cpu_to_le16(dur); } -static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, - int queue) +static int inline is_ieee80211_device(struct ieee80211_local *local, + struct net_device *dev) { - return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} - -static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); -} - -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) -{ - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); + return local == wdev_priv(dev->ieee80211_ptr); } /* tx handlers */ -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u32 sta_flags; - if (unlikely(tx->flags & IEEE80211_TX_INJECTED)) + if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(tx->local->sta_sw_scanning) && - ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) + if (unlikely(tx->local->sw_scanning) && + !ieee80211_is_probe_req(hdr->frame_control)) return TX_DROP; - if (tx->sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) + if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) return TX_CONTINUE; if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; - sta_flags = tx->sta ? tx->sta->flags : 0; + sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; if (likely(tx->flags & IEEE80211_TX_UNICAST)) { if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && + ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: dropped data frame to not " @@ -272,9 +210,9 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) return TX_DROP; } } else { - if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + if (unlikely(ieee80211_is_data(hdr->frame_control) && tx->local->num_sta == 0 && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) { + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC)) { /* * No associated STAs - no need to send multicast * frames. @@ -287,17 +225,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) return TX_CONTINUE; } -static ieee80211_tx_result -ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - - if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) - ieee80211_include_sequence(tx->sdata, hdr); - - return TX_CONTINUE; -} - /* This function is called whenever the AP is about to exceed the maximum limit * of buffered frames for power saving STAs. This situation should not really * happen often during normal operation, so dropping the oldest buffered packet @@ -316,8 +243,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { struct ieee80211_if_ap *ap; - if (sdata->dev == local->mdev || - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) continue; ap = &sdata->u.ap; skb = skb_dequeue(&ap->ps_bc_buf); @@ -340,13 +266,18 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) rcu_read_unlock(); local->total_ps_buffered = total; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", wiphy_name(local->hw.wiphy), purged); +#endif } static ieee80211_tx_result ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + /* * broadcast/multicast frame * @@ -355,8 +286,12 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) * This is done either by the hardware or us. */ - /* not AP/IBSS or ordered frame */ - if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER)) + /* powersaving STAs only in AP/VLAN mode */ + if (!tx->sdata->bss) + return TX_CONTINUE; + + /* no buffering for ordered frames */ + if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; /* no stations in PS mode */ @@ -369,11 +304,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= AP_MAX_BC_BUFFER) { +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: BC TX buffer full - " "dropping the oldest frame\n", tx->dev->name); } +#endif dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); } else tx->local->total_ps_buffered++; @@ -382,7 +319,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) } /* buffered in hardware */ - tx->control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM; + info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; return TX_CONTINUE; } @@ -391,31 +328,35 @@ static ieee80211_tx_result ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + u32 staflags; DECLARE_MAC_BUF(mac); - if (unlikely(!sta || - ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) + if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control))) return TX_CONTINUE; - if (unlikely((sta->flags & WLAN_STA_PS) && - !(sta->flags & WLAN_STA_PSPOLL))) { - struct ieee80211_tx_packet_data *pkt_data; + staflags = get_sta_flags(sta); + + if (unlikely((staflags & WLAN_STA_PS) && + !(staflags & WLAN_STA_PSPOLL))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " "before %d)\n", - print_mac(mac, sta->addr), sta->aid, + print_mac(mac, sta->sta.addr), sta->sta.aid, skb_queue_len(&sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: STA %s TX " "buffer full - dropping oldest frame\n", - tx->dev->name, print_mac(mac, sta->addr)); + tx->dev->name, print_mac(mac, sta->sta.addr)); } +#endif dev_kfree_skb(old); } else tx->local->total_ps_buffered++; @@ -424,24 +365,23 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) if (skb_queue_empty(&sta->ps_tx_buf)) sta_info_set_tim_bit(sta); - pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; - pkt_data->jiffies = jiffies; + info->control.jiffies = jiffies; skb_queue_tail(&sta->ps_tx_buf, tx->skb); return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(sta->flags & WLAN_STA_PS)) { + else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->flags &= ~WLAN_STA_PSPOLL; + clear_sta_flags(sta, WLAN_STA_PSPOLL); return TX_CONTINUE; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) @@ -453,58 +393,246 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) return ieee80211_tx_h_multicast_ps_buf(tx); } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) { struct ieee80211_key *key; - u16 fc = tx->fc; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - if (unlikely(tx->control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) + if (unlikely(tx->skb->do_not_encrypt)) tx->key = NULL; else if (tx->sta && (key = rcu_dereference(tx->sta->key))) tx->key = key; else if ((key = rcu_dereference(tx->sdata->default_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && - !(tx->control->flags & IEEE80211_TXCTL_EAPOL_FRAME) && - !(tx->flags & IEEE80211_TX_INJECTED)) { + (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && + !(info->flags & IEEE80211_TX_CTL_INJECTED)) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; } else tx->key = NULL; if (tx->key) { - u16 ftype, stype; - tx->key->tx_rx_count++; /* TODO: add threshold stuff again */ switch (tx->key->conf.alg) { case ALG_WEP: - ftype = fc & IEEE80211_FCTL_FTYPE; - stype = fc & IEEE80211_FCTL_STYPE; - - if (ftype == IEEE80211_FTYPE_MGMT && - stype == IEEE80211_STYPE_AUTH) + if (ieee80211_is_auth(hdr->frame_control)) break; case ALG_TKIP: case ALG_CCMP: - if (!WLAN_FC_DATA_PRESENT(fc)) + if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; } } if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) - tx->control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 1; return TX_CONTINUE; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) +{ + struct rate_selection rsel; + struct ieee80211_supported_band *sband; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + + sband = tx->local->hw.wiphy->bands[tx->channel->band]; + + if (likely(tx->rate_idx < 0)) { + rate_control_get_rate(tx->sdata, sband, tx->sta, + tx->skb, &rsel); + if (tx->sta) + tx->sta->last_txrate_idx = rsel.rate_idx; + tx->rate_idx = rsel.rate_idx; + if (unlikely(rsel.probe_idx >= 0)) { + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; + info->control.retries[0].rate_idx = tx->rate_idx; + info->control.retries[0].limit = tx->local->hw.max_altrate_tries; + tx->rate_idx = rsel.probe_idx; + } else if (info->control.retries[0].limit == 0) + info->control.retries[0].rate_idx = -1; + + if (unlikely(tx->rate_idx < 0)) + return TX_DROP; + } else + info->control.retries[0].rate_idx = -1; + + if (tx->sdata->bss_conf.use_cts_prot && + (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { + tx->last_frag_rate_idx = tx->rate_idx; + if (rsel.probe_idx >= 0) + tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; + else + tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; + tx->rate_idx = rsel.nonerp_idx; + info->tx_rate_idx = rsel.nonerp_idx; + info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + tx->last_frag_rate_idx = tx->rate_idx; + info->tx_rate_idx = tx->rate_idx; + } + info->tx_rate_idx = tx->rate_idx; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_supported_band *sband; + + sband = tx->local->hw.wiphy->bands[tx->channel->band]; + + if (tx->sta) + info->control.sta = &tx->sta->sta; + + if (!info->control.retry_limit) { + if (!is_multicast_ether_addr(hdr->addr1)) { + int len = min_t(int, tx->skb->len + FCS_LEN, + tx->local->fragmentation_threshold); + if (len > tx->local->rts_threshold + && tx->local->rts_threshold < + IEEE80211_MAX_RTS_THRESHOLD) { + info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS; + info->flags |= + IEEE80211_TX_CTL_LONG_RETRY_LIMIT; + info->control.retry_limit = + tx->local->long_retry_limit; + } else { + info->control.retry_limit = + tx->local->short_retry_limit; + } + } else { + info->control.retry_limit = 1; + } + } + + if (tx->flags & IEEE80211_TX_FRAGMENTED) { + /* Do not use multiple retry rates when sending fragmented + * frames. + * TODO: The last fragment could still use multiple retry + * rates. */ + info->control.retries[0].rate_idx = -1; + } + + /* Use CTS protection for unicast frames sent using extended rates if + * there are associated non-ERP stations and RTS/CTS is not configured + * for the frame. */ + if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && + (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) && + (tx->flags & IEEE80211_TX_UNICAST) && + tx->sdata->bss_conf.use_cts_prot && + !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)) + info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT; + + /* Transmit data frames using short preambles if the driver supports + * short preambles at the selected rate and short preambles are + * available on the network at the current point in time. */ + if (ieee80211_is_data(hdr->frame_control) && + (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) && + tx->sdata->bss_conf.use_short_preamble && + (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) { + info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + } + + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { + struct ieee80211_rate *rate; + s8 baserate = -1; + int idx; + + /* Do not use multiple retry rates when using RTS/CTS */ + info->control.retries[0].rate_idx = -1; + + /* Use min(data rate, max base rate) as CTS/RTS rate */ + rate = &sband->bitrates[tx->rate_idx]; + + for (idx = 0; idx < sband->n_bitrates; idx++) { + if (sband->bitrates[idx].bitrate > rate->bitrate) + continue; + if (tx->sdata->bss_conf.basic_rates & BIT(idx) && + (baserate < 0 || + (sband->bitrates[baserate].bitrate + < sband->bitrates[idx].bitrate))) + baserate = idx; + } + + if (baserate >= 0) + info->control.rts_cts_rate_idx = baserate; + else + info->control.rts_cts_rate_idx = 0; + } + + if (tx->sta) + info->control.sta = &tx->sta->sta; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + u16 *seq; + u8 *qc; + int tid; + + /* + * Packet injection may want to control the sequence + * number, if we have no matching interface then we + * neither assign one ourselves nor ask the driver to. + */ + if (unlikely(!info->control.vif)) + return TX_CONTINUE; + + if (unlikely(ieee80211_is_ctl(hdr->frame_control))) + return TX_CONTINUE; + + if (ieee80211_hdrlen(hdr->frame_control) < 24) + return TX_CONTINUE; + + if (!ieee80211_is_data_qos(hdr->frame_control)) { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + return TX_CONTINUE; + } + + /* + * This should be true for injected/management frames only, for + * management frames we have set the IEEE80211_TX_CTL_ASSIGN_SEQ + * above since they are not QoS-data frames. + */ + if (!tx->sta) + return TX_CONTINUE; + + /* include per-STA, per-TID sequence counter */ + + qc = ieee80211_get_qos_ctl(hdr); + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + seq = &tx->sta->tid_seq[tid]; + + hdr->seq_ctrl = cpu_to_le16(*seq); + + /* Increase the sequence number. */ + *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; size_t hdrlen, per_fragm, num_fragm, payload_len, left; struct sk_buff **frags, *first, *frag; int i; @@ -515,9 +643,19 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) return TX_CONTINUE; + /* + * Warn when submitting a fragmented A-MPDU frame and drop it. + * This scenario is handled in __ieee80211_tx_prepare but extra + * caution taken here as fragmented ampdu may cause Tx stop. + */ + if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU || + skb_get_queue_mapping(tx->skb) >= + ieee80211_num_regular_queues(&tx->local->hw))) + return TX_DROP; + first = tx->skb; - hdrlen = ieee80211_get_hdrlen(tx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); payload_len = first->len - hdrlen; per_fragm = frag_threshold - hdrlen - FCS_LEN; num_fragm = DIV_ROUND_UP(payload_len, per_fragm); @@ -558,6 +696,9 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); copylen = left > per_fragm ? per_fragm : left; memcpy(skb_put(frag, copylen), pos, copylen); + memcpy(frag->cb, first->cb, sizeof(frag->cb)); + skb_copy_queue_mapping(frag, first); + frag->do_not_encrypt = first->do_not_encrypt; pos += copylen; left -= copylen; @@ -570,7 +711,6 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) return TX_CONTINUE; fail: - printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); if (frags) { for (i = 0; i < num_fragm - 1; i++) if (frags[i]) @@ -581,7 +721,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) return TX_DROP; } -static ieee80211_tx_result +static ieee80211_tx_result debug_noinline ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) { if (!tx->key) @@ -601,236 +741,57 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) return TX_DROP; } -static ieee80211_tx_result -ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) { - struct rate_selection rsel; - struct ieee80211_supported_band *sband; - - sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band]; - - if (likely(!tx->rate)) { - rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); - tx->rate = rsel.rate; - if (unlikely(rsel.probe)) { - tx->control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->control->alt_retry_rate = tx->rate; - tx->rate = rsel.probe; - } else - tx->control->alt_retry_rate = NULL; - - if (!tx->rate) - return TX_DROP; - } else - tx->control->alt_retry_rate = NULL; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + int next_len, i; + int group_addr = is_multicast_ether_addr(hdr->addr1); - if (tx->sdata->bss_conf.use_cts_prot && - (tx->flags & IEEE80211_TX_FRAGMENTED) && rsel.nonerp) { - tx->last_frag_rate = tx->rate; - if (rsel.probe) - tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; - else - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->rate = rsel.nonerp; - tx->control->tx_rate = rsel.nonerp; - tx->control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } else { - tx->last_frag_rate = tx->rate; - tx->control->tx_rate = tx->rate; + if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) { + hdr->duration_id = ieee80211_duration(tx, group_addr, 0); + return TX_CONTINUE; } - tx->control->tx_rate = tx->rate; - - return TX_CONTINUE; -} -static ieee80211_tx_result -ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 fc = le16_to_cpu(hdr->frame_control); - u16 dur; - struct ieee80211_tx_control *control = tx->control; + hdr->duration_id = ieee80211_duration(tx, group_addr, + tx->extra_frag[0]->len); - if (!control->retry_limit) { - if (!is_multicast_ether_addr(hdr->addr1)) { - if (tx->skb->len + FCS_LEN > tx->local->rts_threshold - && tx->local->rts_threshold < - IEEE80211_MAX_RTS_THRESHOLD) { - control->flags |= - IEEE80211_TXCTL_USE_RTS_CTS; - control->flags |= - IEEE80211_TXCTL_LONG_RETRY_LIMIT; - control->retry_limit = - tx->local->long_retry_limit; - } else { - control->retry_limit = - tx->local->short_retry_limit; - } + for (i = 0; i < tx->num_extra_frag; i++) { + if (i + 1 < tx->num_extra_frag) { + next_len = tx->extra_frag[i + 1]->len; } else { - control->retry_limit = 1; - } - } - - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - control->alt_retry_rate = NULL; - } - - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && - (tx->rate->flags & IEEE80211_RATE_ERP_G) && - (tx->flags & IEEE80211_TX_UNICAST) && - tx->sdata->bss_conf.use_cts_prot && - !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) - control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; - - /* Transmit data frames using short preambles if the driver supports - * short preambles at the selected rate and short preambles are - * available on the network at the current point in time. */ - if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && - (tx->rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && - tx->sdata->bss_conf.use_short_preamble && - (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { - tx->control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - } - - /* Setup duration field for the first fragment of the frame. Duration - * for remaining fragments will be updated when they are being sent - * to low-level driver in ieee80211_tx(). */ - dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), - (tx->flags & IEEE80211_TX_FRAGMENTED) ? - tx->extra_frag[0]->len : 0); - hdr->duration_id = cpu_to_le16(dur); - - if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || - (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { - struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate, *baserate; - int idx; - - sband = tx->local->hw.wiphy->bands[ - tx->local->hw.conf.channel->band]; - - /* Do not use multiple retry rates when using RTS/CTS */ - control->alt_retry_rate = NULL; - - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = tx->rate; - baserate = NULL; - - for (idx = 0; idx < sband->n_bitrates; idx++) { - if (sband->bitrates[idx].bitrate > rate->bitrate) - continue; - if (tx->sdata->basic_rates & BIT(idx) && - (!baserate || - (baserate->bitrate < sband->bitrates[idx].bitrate))) - baserate = &sband->bitrates[idx]; + next_len = 0; + tx->rate_idx = tx->last_frag_rate_idx; } - if (baserate) - control->rts_cts_rate = baserate; - else - control->rts_cts_rate = &sband->bitrates[0]; - } - - if (tx->sta) { - control->aid = tx->sta->aid; - tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->extra_frag) { - int i; - tx->sta->tx_fragments += tx->num_extra_frag; - for (i = 0; i < tx->num_extra_frag; i++) { - tx->sta->tx_bytes += - tx->extra_frag[i]->len; - } - } + hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; + hdr->duration_id = ieee80211_duration(tx, 0, next_len); } return TX_CONTINUE; } -static ieee80211_tx_result -ieee80211_tx_h_load_stats(struct ieee80211_tx_data *tx) +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) { - struct ieee80211_local *local = tx->local; - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - struct ieee80211_rate *rate = tx->rate; - - /* TODO: this could be part of tx_status handling, so that the number - * of retries would be known; TX rate should in that case be stored - * somewhere with the packet */ - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (tx->channel->band == IEEE80211_BAND_5GHZ || - (tx->channel->band == IEEE80211_BAND_2GHZ && - rate->flags & IEEE80211_RATE_ERP_G)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - if (tx->control->flags & IEEE80211_TXCTL_USE_RTS_CTS) - load += 2 * hdrtime; - else if (tx->control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) - load += hdrtime; + int i; - /* TODO: optimise again */ - load += skb->len * CHAN_UTIL_RATE_LCM / rate->bitrate; + if (!tx->sta) + return TX_CONTINUE; + tx->sta->tx_packets++; + tx->sta->tx_fragments++; + tx->sta->tx_bytes += tx->skb->len; if (tx->extra_frag) { - int i; - for (i = 0; i < tx->num_extra_frag; i++) { - load += 2 * hdrtime; - load += tx->extra_frag[i]->len * - tx->rate->bitrate; - } + tx->sta->tx_fragments += tx->num_extra_frag; + for (i = 0; i < tx->num_extra_frag; i++) + tx->sta->tx_bytes += tx->extra_frag[i]->len; } - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (tx->sta) - tx->sta->channel_use_raw += load; - tx->sdata->channel_use_raw += load; - return TX_CONTINUE; } -typedef ieee80211_tx_result (*ieee80211_tx_handler)(struct ieee80211_tx_data *); -static ieee80211_tx_handler ieee80211_tx_handlers[] = -{ - ieee80211_tx_h_check_assoc, - ieee80211_tx_h_sequence, - ieee80211_tx_h_ps_buf, - ieee80211_tx_h_select_key, - ieee80211_tx_h_michael_mic_add, - ieee80211_tx_h_fragment, - ieee80211_tx_h_encrypt, - ieee80211_tx_h_rate_ctrl, - ieee80211_tx_h_misc, - ieee80211_tx_h_load_stats, - NULL -}; - /* actual transmit path */ /* @@ -854,12 +815,11 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - struct ieee80211_tx_control *control = tx->control; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sband = tx->local->hw.wiphy->bands[tx->local->hw.conf.channel->band]; + sband = tx->local->hw.wiphy->bands[tx->channel->band]; - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - tx->flags |= IEEE80211_TX_INJECTED; + skb->do_not_encrypt = 1; tx->flags &= ~IEEE80211_TX_FRAGMENTED; /* @@ -896,7 +856,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, r = &sband->bitrates[i]; if (r->bitrate == target_rate) { - tx->rate = r; + tx->rate_idx = i; break; } } @@ -907,7 +867,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, * radiotap uses 0 for 1st ant, mac80211 is 1 for * 1st ant */ - control->antenna_sel_tx = (*iterator.this_arg) + 1; + info->antenna_sel_tx = (*iterator.this_arg) + 1; break; #if 0 @@ -931,8 +891,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, skb_trim(skb, skb->len - FCS_LEN); } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - control->flags &= - ~IEEE80211_TXCTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 0; if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -967,12 +926,12 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, static ieee80211_tx_result __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct sk_buff *skb, - struct net_device *dev, - struct ieee80211_tx_control *control) + struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen; @@ -981,7 +940,9 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->dev = dev; /* use original interface */ tx->local = local; tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); - tx->control = control; + tx->channel = local->hw.conf.channel; + tx->rate_idx = -1; + tx->last_frag_rate_idx = -1; /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. @@ -990,7 +951,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) { + if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) { if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP) return TX_DROP; @@ -1004,38 +965,36 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, hdr = (struct ieee80211_hdr *) skb->data; tx->sta = sta_info_get(local, hdr->addr1); - tx->fc = le16_to_cpu(hdr->frame_control); if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; - control->flags |= IEEE80211_TXCTL_NO_ACK; + info->flags |= IEEE80211_TX_CTL_NO_ACK; } else { tx->flags |= IEEE80211_TX_UNICAST; - control->flags &= ~IEEE80211_TXCTL_NO_ACK; + info->flags &= ~IEEE80211_TX_CTL_NO_ACK; } if (tx->flags & IEEE80211_TX_FRAGMENTED) { if ((tx->flags & IEEE80211_TX_UNICAST) && skb->len + FCS_LEN > local->fragmentation_threshold && - !local->ops->set_frag_threshold) + !local->ops->set_frag_threshold && + !(info->flags & IEEE80211_TX_CTL_AMPDU)) tx->flags |= IEEE80211_TX_FRAGMENTED; else tx->flags &= ~IEEE80211_TX_FRAGMENTED; } if (!tx->sta) - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - else if (tx->sta->flags & WLAN_STA_CLEAR_PS_FILT) { - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - tx->sta->flags &= ~WLAN_STA_CLEAR_PS_FILT; - } + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - hdrlen = ieee80211_get_hdrlen(tx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; tx->ethertype = (pos[0] << 8) | pos[1]; } - control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; return TX_CONTINUE; } @@ -1043,24 +1002,21 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* * NB: @tx is uninitialised when passed in here */ -static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, - struct sk_buff *skb, - struct net_device *mdev, - struct ieee80211_tx_control *control) +static int ieee80211_tx_prepare(struct ieee80211_local *local, + struct ieee80211_tx_data *tx, + struct sk_buff *skb) { - struct ieee80211_tx_packet_data *pkt_data; struct net_device *dev; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - dev = dev_get_by_index(&init_net, pkt_data->ifindex); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + dev = dev_get_by_index(&init_net, skb->iif); + if (unlikely(dev && !is_ieee80211_device(local, dev))) { dev_put(dev); dev = NULL; } if (unlikely(!dev)) return -ENODEV; /* initialises tx with control */ - __ieee80211_tx_prepare(tx, skb, dev, control); + __ieee80211_tx_prepare(tx, skb, dev); dev_put(dev); return 0; } @@ -1068,50 +1024,45 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_tx_data *tx) { - struct ieee80211_tx_control *control = tx->control; + struct ieee80211_tx_info *info; int ret, i; - if (!ieee80211_qdisc_installed(local->mdev) && - __ieee80211_queue_stopped(local, 0)) { - netif_stop_queue(local->mdev); - return IEEE80211_TX_AGAIN; - } if (skb) { - ieee80211_dump_frame(wiphy_name(local->hw.wiphy), - "TX to low-level driver", skb); - ret = local->ops->tx(local_to_hw(local), skb, control); + if (netif_subqueue_stopped(local->mdev, skb)) + return IEEE80211_TX_AGAIN; + info = IEEE80211_SKB_CB(skb); + + ret = local->ops->tx(local_to_hw(local), skb); if (ret) return IEEE80211_TX_AGAIN; local->mdev->trans_start = jiffies; ieee80211_led_tx(local, 1); } if (tx->extra_frag) { - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT | - IEEE80211_TXCTL_CLEAR_PS_FILT | - IEEE80211_TXCTL_FIRST_FRAGMENT); for (i = 0; i < tx->num_extra_frag; i++) { if (!tx->extra_frag[i]) continue; - if (__ieee80211_queue_stopped(local, control->queue)) + info = IEEE80211_SKB_CB(tx->extra_frag[i]); + info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS | + IEEE80211_TX_CTL_USE_CTS_PROTECT | + IEEE80211_TX_CTL_CLEAR_PS_FILT | + IEEE80211_TX_CTL_FIRST_FRAGMENT); + if (netif_subqueue_stopped(local->mdev, + tx->extra_frag[i])) return IEEE80211_TX_FRAG_AGAIN; if (i == tx->num_extra_frag) { - control->tx_rate = tx->last_frag_rate; + info->tx_rate_idx = tx->last_frag_rate_idx; if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG) - control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; + info->flags |= + IEEE80211_TX_CTL_RATE_CTRL_PROBE; else - control->flags &= - ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + info->flags &= + ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; } - ieee80211_dump_frame(wiphy_name(local->hw.wiphy), - "TX to low-level driver", - tx->extra_frag[i]); ret = local->ops->tx(local_to_hw(local), - tx->extra_frag[i], - control); + tx->extra_frag[i]); if (ret) return IEEE80211_TX_FRAG_AGAIN; local->mdev->trans_start = jiffies; @@ -1124,17 +1075,65 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, return IEEE80211_TX_OK; } -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_tx_control *control) +/* + * Invoke TX handlers, return 0 on success and non-zero if the + * frame was dropped or queued. + */ +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb = tx->skb; + ieee80211_tx_result res = TX_DROP; + int i; + +#define CALL_TXH(txh) \ + res = txh(tx); \ + if (res != TX_CONTINUE) \ + goto txh_done; + + CALL_TXH(ieee80211_tx_h_check_assoc) + CALL_TXH(ieee80211_tx_h_ps_buf) + CALL_TXH(ieee80211_tx_h_select_key) + CALL_TXH(ieee80211_tx_h_michael_mic_add) + CALL_TXH(ieee80211_tx_h_rate_ctrl) + CALL_TXH(ieee80211_tx_h_misc) + CALL_TXH(ieee80211_tx_h_sequence) + CALL_TXH(ieee80211_tx_h_fragment) + /* handlers after fragment must be aware of tx info fragmentation! */ + CALL_TXH(ieee80211_tx_h_encrypt) + CALL_TXH(ieee80211_tx_h_calculate_duration) + CALL_TXH(ieee80211_tx_h_stats) +#undef CALL_TXH + + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + dev_kfree_skb(skb); + for (i = 0; i < tx->num_extra_frag; i++) + if (tx->extra_frag[i]) + dev_kfree_skb(tx->extra_frag[i]); + kfree(tx->extra_frag); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; - ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; - ieee80211_tx_result res = TX_DROP, res_prepare; - int ret, i, retries = 0; + ieee80211_tx_result res_prepare; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int ret, i; + u16 queue; - WARN_ON(__ieee80211_queue_pending(local, control->queue)); + queue = skb_get_queue_mapping(skb); + + WARN_ON(test_bit(queue, local->queues_pending)); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); @@ -1144,7 +1143,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, rcu_read_lock(); /* initialises tx */ - res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev); if (res_prepare == TX_DROP) { dev_kfree_skb(skb); @@ -1154,86 +1153,54 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, sta = tx.sta; tx.channel = local->hw.conf.channel; + info->band = tx.channel->band; - for (handler = ieee80211_tx_handlers; *handler != NULL; - handler++) { - res = (*handler)(&tx); - if (res != TX_CONTINUE) - break; - } - - skb = tx.skb; /* handlers are allowed to change skb */ - - if (unlikely(res == TX_DROP)) { - I802_DEBUG_INC(local->tx_handlers_drop); - goto drop; - } - - if (unlikely(res == TX_QUEUED)) { - I802_DEBUG_INC(local->tx_handlers_queued); - rcu_read_unlock(); - return 0; - } - - if (tx.extra_frag) { - for (i = 0; i < tx.num_extra_frag; i++) { - int next_len, dur; - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) - tx.extra_frag[i]->data; - - if (i + 1 < tx.num_extra_frag) { - next_len = tx.extra_frag[i + 1]->len; - } else { - next_len = 0; - tx.rate = tx.last_frag_rate; - } - dur = ieee80211_duration(&tx, 0, next_len); - hdr->duration_id = cpu_to_le16(dur); - } - } + if (invoke_tx_handlers(&tx)) + goto out; retry: ret = __ieee80211_tx(local, skb, &tx); if (ret) { - struct ieee80211_tx_stored_packet *store = - &local->pending_packet[control->queue]; + struct ieee80211_tx_stored_packet *store; + + /* + * Since there are no fragmented frames on A-MPDU + * queues, there's no reason for a driver to reject + * a frame there, warn and drop it. + */ + if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw))) + goto drop; + + store = &local->pending_packet[queue]; if (ret == IEEE80211_TX_FRAG_AGAIN) skb = NULL; - set_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); + + set_bit(queue, local->queues_pending); smp_mb(); - /* When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, there is - * a small window between IEEE80211_LINK_STATE_XOFF and - * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer + /* + * When the driver gets out of buffers during sending of + * fragments and calls ieee80211_stop_queue, the netif + * subqueue is stopped. There is, however, a small window + * in which the PENDING bit is not yet set. If a buffer * gets available in that window (i.e. driver calls * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with IEEE80211_LINK_STATE_PENDING. Prevent this by + * called with the PENDING bit still set. Prevent this by * continuing transmitting here when that situation is - * possible to have happened. */ - if (!__ieee80211_queue_stopped(local, control->queue)) { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - retries++; - /* - * Driver bug, it's rejecting packets but - * not stopping queues. - */ - if (WARN_ON_ONCE(retries > 5)) - goto drop; + * possible to have happened. + */ + if (!__netif_subqueue_stopped(local->mdev, queue)) { + clear_bit(queue, local->queues_pending); goto retry; } - memcpy(&store->control, control, - sizeof(struct ieee80211_tx_control)); store->skb = skb; store->extra_frag = tx.extra_frag; store->num_extra_frag = tx.num_extra_frag; - store->last_frag_rate = tx.last_frag_rate; + store->last_frag_rate_idx = tx.last_frag_rate_idx; store->last_frag_rate_ctrl_probe = !!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG); } + out: rcu_read_unlock(); return 0; @@ -1250,25 +1217,65 @@ retry: /* device xmit handlers */ -int ieee80211_master_start_xmit(struct sk_buff *skb, - struct net_device *dev) +static int ieee80211_skb_resize(struct ieee80211_local *local, + struct sk_buff *skb, + int head_need, bool may_encrypt) +{ + int tail_need = 0; + + /* + * This could be optimised, devices that do full hardware + * crypto (including TKIP MMIC) need no tailroom... But we + * have no drivers for such devices currently. + */ + if (may_encrypt) { + tail_need = IEEE80211_ENCRYPT_TAILROOM; + tail_need -= skb_tailroom(skb); + tail_need = max_t(int, tail_need, 0); + } + + if (head_need || tail_need) { + /* Sorry. Can't account for this any more */ + skb_orphan(skb); + } + + if (skb_header_cloned(skb)) + I802_DEBUG_INC(local->tx_expand_skb_head_cloned); + else + I802_DEBUG_INC(local->tx_expand_skb_head); + + if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { + printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", + wiphy_name(local->hw.wiphy)); + return -ENOMEM; + } + + /* update truesize too */ + skb->truesize += head_need + tail_need; + + return 0; +} + +int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_tx_control control; - struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct net_device *odev = NULL; struct ieee80211_sub_if_data *osdata; int headroom; + bool may_encrypt; + enum { + NOT_MONITOR, + FOUND_SDATA, + UNKNOWN_ADDRESS, + } monitor_iface = NOT_MONITOR; int ret; - /* - * copy control out of the skb so other people can use skb->cb - */ - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(&control, 0, sizeof(struct ieee80211_tx_control)); - - if (pkt_data->ifindex) - odev = dev_get_by_index(&init_net, pkt_data->ifindex); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + if (skb->iif) + odev = dev_get_by_index(&init_net, skb->iif); + if (unlikely(odev && !is_ieee80211_device(local, odev))) { dev_put(odev); odev = NULL; } @@ -1280,32 +1287,88 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, dev_kfree_skb(skb); return 0; } + + memset(info, 0, sizeof(*info)); + + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; - if (skb_headroom(skb) < headroom) { - if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - dev_put(odev); - return 0; + if (ieee80211_vif_is_mesh(&osdata->vif) && + ieee80211_is_data(hdr->frame_control)) { + if (is_multicast_ether_addr(hdr->addr3)) + memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); + else + if (mesh_nexthop_lookup(skb, osdata)) + return 0; + if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) + IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, + fwded_frames); + } else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) { + struct ieee80211_sub_if_data *sdata; + int hdrlen; + u16 len_rthdr; + + info->flags |= IEEE80211_TX_CTL_INJECTED; + monitor_iface = UNKNOWN_ADDRESS; + + len_rthdr = ieee80211_get_radiotap_len(skb->data); + hdr = (struct ieee80211_hdr *)skb->data + len_rthdr; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* check the header is complete in the frame */ + if (likely(skb->len >= len_rthdr + hdrlen)) { + /* + * We process outgoing injected frames that have a + * local address we handle as though they are our + * own frames. + * This code here isn't entirely correct, the local + * MAC address is not necessarily enough to find + * the interface to use; for that proper VLAN/WDS + * support we will need a different mechanism. + */ + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, + list) { + if (!netif_running(sdata->dev)) + continue; + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr2)) { + dev_hold(sdata->dev); + dev_put(odev); + osdata = sdata; + odev = osdata->dev; + skb->iif = sdata->dev->ifindex; + monitor_iface = FOUND_SDATA; + break; + } + } + rcu_read_unlock(); } } - control.vif = &osdata->vif; - control.type = osdata->vif.type; - if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) - control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; - if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) - control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) - control.flags |= IEEE80211_TXCTL_REQUEUE; - if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME) - control.flags |= IEEE80211_TXCTL_EAPOL_FRAME; - if (pkt_data->flags & IEEE80211_TXPD_AMPDU) - control.flags |= IEEE80211_TXCTL_AMPDU; - control.queue = pkt_data->queue; - - ret = ieee80211_tx(odev, skb, &control); + may_encrypt = !skb->do_not_encrypt; + + headroom = osdata->local->tx_headroom; + if (may_encrypt) + headroom += IEEE80211_ENCRYPT_HEADROOM; + headroom -= skb_headroom(skb); + headroom = max_t(int, 0, headroom); + + if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) { + dev_kfree_skb(skb); + dev_put(odev); + return 0; + } + + if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN) + osdata = container_of(osdata->bss, + struct ieee80211_sub_if_data, + u.ap); + if (likely(monitor_iface != UNKNOWN_ADDRESS)) + info->control.vif = &osdata->vif; + ret = ieee80211_tx(odev, skb); dev_put(odev); return ret; @@ -1315,7 +1378,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; u16 len_rthdr; @@ -1337,12 +1399,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, skb->dev = local->mdev; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); /* needed because we set skb device to master */ - pkt_data->ifindex = dev->ifindex; + skb->iif = dev->ifindex; - pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + /* sometimes we do encrypt injected frames, will be fixed + * up in radiotap parser if not wanted */ + skb->do_not_encrypt = 0; /* * fix up the pointers accounting for the radiotap @@ -1385,11 +1447,11 @@ fail: int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int ret = 1, head_need; - u16 ethertype, hdrlen, meshhdrlen = 0, fc; + u16 ethertype, hdrlen, meshhdrlen = 0; + __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr; const u8 *encaps_data; @@ -1398,10 +1460,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct sta_info *sta; u32 sta_flags = 0; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (unlikely(skb->len < ETH_HLEN)) { - printk(KERN_DEBUG "%s: short skb (len=%d)\n", - dev->name, skb->len); ret = 0; goto fail; } @@ -1412,20 +1471,20 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ ethertype = (skb->data[12] << 8) | skb->data[13]; - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: - fc |= IEEE80211_FCTL_FROMDS; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_WDS: - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + case NL80211_IFTYPE_WDS: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); @@ -1434,45 +1493,64 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 30; break; #ifdef CONFIG_MAC80211_MESH - case IEEE80211_IF_TYPE_MESH_POINT: - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; - /* RA TA DA SA */ - if (is_multicast_ether_addr(skb->data)) - memcpy(hdr.addr1, skb->data, ETH_ALEN); - else if (mesh_nexthop_lookup(hdr.addr1, skb, dev)) - return 0; - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - if (skb->pkt_type == PACKET_OTHERHOST) { - /* Forwarded frame, keep mesh ttl and seqnum */ - struct ieee80211s_hdr *prev_meshhdr; - prev_meshhdr = ((struct ieee80211s_hdr *)skb->cb); - meshhdrlen = ieee80211_get_mesh_hdrlen(prev_meshhdr); - memcpy(&mesh_hdr, prev_meshhdr, meshhdrlen); - sdata->u.sta.mshstats.fwded_frames++; + case NL80211_IFTYPE_MESH_POINT: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); + if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { + /* Do not send frames with mesh_ttl == 0 */ + sdata->u.mesh.mshstats.dropped_frames_ttl++; + ret = 0; + goto fail; + } + memset(&mesh_hdr, 0, sizeof(mesh_hdr)); + + if (compare_ether_addr(dev->dev_addr, + skb->data + ETH_ALEN) == 0) { + /* RA TA DA SA */ + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata); } else { - if (!sdata->u.sta.mshcfg.dot11MeshTTL) { - /* Do not send frames with mesh_ttl == 0 */ - sdata->u.sta.mshstats.dropped_frames_ttl++; - ret = 0; - goto fail; + /* packet from other interface */ + struct mesh_path *mppath; + + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr4, dev->dev_addr, ETH_ALEN); + + if (is_multicast_ether_addr(skb->data)) + memcpy(hdr.addr3, skb->data, ETH_ALEN); + else { + rcu_read_lock(); + mppath = mpp_path_lookup(skb->data, sdata); + if (mppath) + memcpy(hdr.addr3, mppath->mpp, ETH_ALEN); + else + memset(hdr.addr3, 0xff, ETH_ALEN); + rcu_read_unlock(); } - meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, - sdata); + + mesh_hdr.flags |= MESH_FLAGS_AE_A5_A6; + mesh_hdr.ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &mesh_hdr.seqnum); + memcpy(mesh_hdr.eaddr1, skb->data, ETH_ALEN); + memcpy(mesh_hdr.eaddr2, skb->data + ETH_ALEN, ETH_ALEN); + sdata->u.mesh.mesh_seqnum++; + meshhdrlen = 18; } hdrlen = 30; break; #endif - case IEEE80211_IF_TYPE_STA: - fc |= IEEE80211_FCTL_TODS; + case NL80211_IFTYPE_STATION: + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -1493,13 +1571,14 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); sta = sta_info_get(local, hdr.addr1); if (sta) - sta_flags = sta->flags; + sta_flags = get_sta_flags(sta); rcu_read_unlock(); } - /* receiver is QoS enabled, use a QoS type frame */ - if (sta_flags & WLAN_STA_WME) { - fc |= IEEE80211_STYPE_QOS_DATA; + /* receiver and we are QoS enabled, use a QoS type frame */ + if (sta_flags & WLAN_STA_WME && + ieee80211_num_regular_queues(&local->hw) >= 4) { + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -1507,7 +1586,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, * Drop unicast frames to unauthorised stations unless they are * EAPOL frames from the local station. */ - if (unlikely(!is_multicast_ether_addr(hdr.addr1) && + if (!ieee80211_vif_is_mesh(&sdata->vif) && + unlikely(!is_multicast_ether_addr(hdr.addr1) && !(sta_flags & WLAN_STA_AUTHORIZED) && !(ethertype == ETH_P_PAE && compare_ether_addr(dev->dev_addr, @@ -1527,7 +1607,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } - hdr.frame_control = cpu_to_le16(fc); + hdr.frame_control = fc; hdr.duration_id = 0; hdr.seq_ctrl = 0; @@ -1549,45 +1629,26 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos -= skip_header_bytes; h_pos -= skip_header_bytes; - /* TODO: implement support for fragments so that there is no need to - * reallocate and copy payload; it might be enough to support one - * extra fragment that would be copied in the beginning of the frame - * data.. anyway, it would be nice to include this into skb structure - * somehow + head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); + + /* + * So we need to modify the skb header and hence need a copy of + * that. The head_need variable above doesn't, so far, include + * the needed header space that we don't need right away. If we + * can, then we don't reallocate right now but only after the + * frame arrives at the master device (if it does...) * - * There are few options for this: - * use skb->cb as an extra space for 802.11 header - * allocate new buffer if not enough headroom - * make sure that there is enough headroom in every skb by increasing - * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and - * alloc_skb() (net/core/skbuff.c) + * If we cannot, however, then we will reallocate to include all + * the ever needed space. Also, if we need to reallocate it anyway, + * make it big enough for everything we may ever need. */ - head_need = hdrlen + encaps_len + meshhdrlen + local->tx_headroom; - head_need -= skb_headroom(skb); - - /* We are going to modify skb data, so make a copy of it if happens to - * be cloned. This could happen, e.g., with Linux bridge code passing - * us broadcast frames. */ if (head_need > 0 || skb_cloned(skb)) { -#if 0 - printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " - "of headroom\n", dev->name, head_need); -#endif - - if (skb_cloned(skb)) - I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else - I802_DEBUG_INC(local->tx_expand_skb_head); - /* Since we have to reallocate the buffer, make sure that there - * is enough room for possible WEP IV/ICV and TKIP (8 bytes - * before payload and 12 after). */ - if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), - 12, GFP_ATOMIC)) { - printk(KERN_DEBUG "%s: failed to reallocate TX buffer" - "\n", dev->name); + head_need += IEEE80211_ENCRYPT_HEADROOM; + head_need += local->tx_headroom; + head_need = max_t(int, 0, head_need); + if (ieee80211_skb_resize(local, skb, head_need, true)) goto fail; - } } if (encaps_data) { @@ -1602,7 +1663,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, h_pos += meshhdrlen; } - if (fc & IEEE80211_STYPE_QOS_DATA) { + if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; qos_control = (__le16*) skb_push(skb, 2); @@ -1618,11 +1679,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos += hdrlen; h_pos += hdrlen; - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = dev->ifindex; - if (ethertype == ETH_P_PAE) - pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME; + skb->iif = dev->ifindex; skb->dev = local->mdev; dev->stats.tx_packets++; @@ -1647,46 +1704,60 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, return ret; } -/* helper functions for pending packets for when queues are stopped */ +/* + * ieee80211_clear_tx_pending may not be called in a context where + * it is possible that it packets could come in again. + */ void ieee80211_clear_tx_pending(struct ieee80211_local *local) { int i, j; struct ieee80211_tx_stored_packet *store; - for (i = 0; i < local->hw.queues; i++) { - if (!__ieee80211_queue_pending(local, i)) + for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + if (!test_bit(i, local->queues_pending)) continue; store = &local->pending_packet[i]; kfree_skb(store->skb); for (j = 0; j < store->num_extra_frag; j++) kfree_skb(store->extra_frag[j]); kfree(store->extra_frag); - clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); + clear_bit(i, local->queues_pending); } } +/* + * Transmit all pending packets. Called from tasklet, locks master device + * TX lock so that no new packets can come in. + */ void ieee80211_tx_pending(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *)data; struct net_device *dev = local->mdev; struct ieee80211_tx_stored_packet *store; struct ieee80211_tx_data tx; - int i, ret, reschedule = 0; + int i, ret; netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - if (__ieee80211_queue_stopped(local, i)) + for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + /* Check that this queue is ok */ + if (__netif_subqueue_stopped(local->mdev, i) && + !test_bit(i, local->queues_pending_run)) continue; - if (!__ieee80211_queue_pending(local, i)) { - reschedule = 1; + + if (!test_bit(i, local->queues_pending)) { + clear_bit(i, local->queues_pending_run); + ieee80211_wake_queue(&local->hw, i); continue; } + + clear_bit(i, local->queues_pending_run); + netif_start_subqueue(local->mdev, i); + store = &local->pending_packet[i]; - tx.control = &store->control; tx.extra_frag = store->extra_frag; tx.num_extra_frag = store->num_extra_frag; - tx.last_frag_rate = store->last_frag_rate; + tx.last_frag_rate_idx = store->last_frag_rate_idx; tx.flags = 0; if (store->last_frag_rate_ctrl_probe) tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG; @@ -1695,19 +1766,11 @@ void ieee80211_tx_pending(unsigned long data) if (ret == IEEE80211_TX_FRAG_AGAIN) store->skb = NULL; } else { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[i]); - reschedule = 1; + clear_bit(i, local->queues_pending); + ieee80211_wake_queue(&local->hw, i); } } netif_tx_unlock_bh(dev); - if (reschedule) { - if (!ieee80211_qdisc_installed(dev)) { - if (!__ieee80211_queue_stopped(local, 0)) - netif_wake_queue(dev); - } else - netif_schedule(dev); - } } /* functions for drivers to get certain frames */ @@ -1776,30 +1839,28 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local, } struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_tx_control *control) + struct ieee80211_vif *vif) { struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; + struct sk_buff *skb = NULL; + struct ieee80211_tx_info *info; struct net_device *bdev; struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; + struct ieee80211_if_sta *ifsta = NULL; struct rate_selection rsel; struct beacon_data *beacon; struct ieee80211_supported_band *sband; - struct ieee80211_mgmt *mgmt; - int *num_beacons; - bool err = true; - u8 *pos; + enum ieee80211_band band = local->hw.conf.channel->band; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; rcu_read_lock(); sdata = vif_to_sdata(vif); bdev = sdata->dev; - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); if (ap && beacon) { @@ -1817,9 +1878,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_include_sequence(sdata, - (struct ieee80211_hdr *)skb->data); - /* * Not very nice, but we want to allow the driver to call * ieee80211_beacon_get() as a response to the set_tim() @@ -1840,12 +1898,27 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), beacon->tail, beacon->tail_len); + } else + goto out; + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + struct ieee80211_hdr *hdr; + ifsta = &sdata->u.sta; - num_beacons = &ap->num_beacons; + if (!ifsta->probe_resp) + goto out; + + skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + if (!skb) + goto out; + + hdr = (struct ieee80211_hdr *) skb->data; + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_BEACON); - err = false; - } } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + struct ieee80211_mgmt *mgmt; + u8 *pos; + /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400); if (!skb) @@ -1855,8 +1928,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_BEACON); + mgmt->frame_control = + cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ @@ -1868,48 +1941,43 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - mesh_mgmt_ies_add(skb, sdata->dev); + mesh_mgmt_ies_add(skb, sdata); + } else { + WARN_ON(1); + goto out; + } - num_beacons = &sdata->u.sta.num_beacons; + info = IEEE80211_SKB_CB(skb); - err = false; - } + skb->do_not_encrypt = 1; - if (err) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "no beacon data avail for %s\n", - bdev->name); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + info->band = band; + rate_control_get_rate(sdata, sband, NULL, skb, &rsel); + + if (unlikely(rsel.rate_idx < 0)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: ieee80211_beacon_get: " + "no rate found\n", + wiphy_name(local->hw.wiphy)); + } + dev_kfree_skb_any(skb); skb = NULL; goto out; } - if (control) { - rate_control_get_rate(local->mdev, sband, skb, &rsel); - if (!rsel.rate) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: " - "no rate found\n", - wiphy_name(local->hw.wiphy)); - } - dev_kfree_skb(skb); - skb = NULL; - goto out; - } + info->control.vif = vif; + info->tx_rate_idx = rsel.rate_idx; + + info->flags |= IEEE80211_TX_CTL_NO_ACK; + info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + if (sdata->bss_conf.use_short_preamble && + sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) + info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + + info->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + info->control.retry_limit = 1; - control->vif = vif; - control->tx_rate = rsel.rate; - if (sdata->bss_conf.use_short_preamble && - rsel.rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) - control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control->flags |= IEEE80211_TXCTL_NO_ACK; - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - control->retry_limit = 1; - control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; - } - (*num_beacons)++; out: rcu_read_unlock(); return skb; @@ -1918,14 +1986,13 @@ EXPORT_SYMBOL(ieee80211_beacon_get); void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, + const struct ieee80211_tx_info *frame_txctl, struct ieee80211_rts *rts) { const struct ieee80211_hdr *hdr = frame; - u16 fctl; - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; - rts->frame_control = cpu_to_le16(fctl); + rts->frame_control = + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); rts->duration = ieee80211_rts_duration(hw, vif, frame_len, frame_txctl); memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); @@ -1935,14 +2002,13 @@ EXPORT_SYMBOL(ieee80211_rts_get); void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, + const struct ieee80211_tx_info *frame_txctl, struct ieee80211_cts *cts) { const struct ieee80211_hdr *hdr = frame; - u16 fctl; - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; - cts->frame_control = cpu_to_le16(fctl); + cts->frame_control = + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); cts->duration = ieee80211_ctstoself_duration(hw, vif, frame_len, frame_txctl); memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); @@ -1951,23 +2017,21 @@ EXPORT_SYMBOL(ieee80211_ctstoself_get); struct sk_buff * ieee80211_get_buffered_bc(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_tx_control *control) + struct ieee80211_vif *vif) { struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct sta_info *sta; - ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; - ieee80211_tx_result res = TX_DROP; struct net_device *bdev; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_ap *bss = NULL; struct beacon_data *beacon; + struct ieee80211_tx_info *info; sdata = vif_to_sdata(vif); bdev = sdata->dev; - + bss = &sdata->u.ap; if (!bss) return NULL; @@ -1975,19 +2039,16 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, rcu_read_lock(); beacon = rcu_dereference(bss->beacon); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || - !beacon->head) { - rcu_read_unlock(); - return NULL; - } + if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) + goto out; if (bss->dtim_count != 0) - return NULL; /* send buffered bc/mc only after DTIM beacon */ - memset(control, 0, sizeof(*control)); + goto out; /* send buffered bc/mc only after DTIM beacon */ + while (1) { skb = skb_dequeue(&bss->ps_bc_buf); if (!skb) - return NULL; + goto out; local->total_ps_buffered--; if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { @@ -2000,30 +2061,21 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (!ieee80211_tx_prepare(&tx, skb, local->mdev, control)) + if (!ieee80211_tx_prepare(local, &tx, skb)) break; dev_kfree_skb_any(skb); } + + info = IEEE80211_SKB_CB(skb); + sta = tx.sta; tx.flags |= IEEE80211_TX_PS_BUFFERED; tx.channel = local->hw.conf.channel; + info->band = tx.channel->band; - for (handler = ieee80211_tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res == TX_DROP || res == TX_QUEUED) - break; - } - skb = tx.skb; /* handlers are allowed to change skb */ - - if (res == TX_DROP) { - I802_DEBUG_INC(local->tx_handlers_drop); - dev_kfree_skb(skb); + if (invoke_tx_handlers(&tx)) skb = NULL; - } else if (res == TX_QUEUED) { - I802_DEBUG_INC(local->tx_handlers_queued); - skb = NULL; - } - + out: rcu_read_unlock(); return skb; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4e97b266f907..f32561ec224c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -43,76 +43,67 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type) + enum nl80211_iftype type) { - u16 fc; + __le16 fc = hdr->frame_control; /* drop ACK/CTS frames and incorrect hdr len (ctrl) */ if (len < 16) return NULL; - fc = le16_to_cpu(hdr->frame_control); - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: + if (ieee80211_is_data(fc)) { if (len < 24) /* drop incorrect hdr len (data) */ return NULL; - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - return hdr->addr1; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + + if (ieee80211_has_a4(fc)) return NULL; - case IEEE80211_FCTL_FROMDS: + if (ieee80211_has_tods(fc)) + return hdr->addr1; + if (ieee80211_has_fromds(fc)) return hdr->addr2; - case 0: - return hdr->addr3; - } - break; - case IEEE80211_FTYPE_MGMT: + + return hdr->addr3; + } + + if (ieee80211_is_mgmt(fc)) { if (len < 24) /* drop incorrect hdr len (mgmt) */ return NULL; return hdr->addr3; - case IEEE80211_FTYPE_CTL: - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) + } + + if (ieee80211_is_ctl(fc)) { + if(ieee80211_is_pspoll(fc)) return hdr->addr1; - else if ((fc & IEEE80211_FCTL_STYPE) == - IEEE80211_STYPE_BACK_REQ) { + + if (ieee80211_is_back_req(fc)) { switch (type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: return hdr->addr2; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: return hdr->addr1; default: - return NULL; + break; /* fall through to the return */ } } - else - return NULL; } return NULL; } -int ieee80211_get_hdrlen(u16 fc) +unsigned int ieee80211_hdrlen(__le16 fc) { - int hdrlen = 24; + unsigned int hdrlen = 24; + + if (ieee80211_is_data(fc)) { + if (ieee80211_has_a4(fc)) + hdrlen = 30; + if (ieee80211_is_data_qos(fc)) + hdrlen += IEEE80211_QOS_CTL_LEN; + goto out; + } - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: + if (ieee80211_is_ctl(fc)) { /* * ACK and CTS are 10 bytes, all others 16. To see how * to get this condition consider @@ -122,25 +113,24 @@ int ieee80211_get_hdrlen(u16 fc) * bits that matter: ^^^ (0x00E0) * value of those: 0b0000000011000000 (0x00C0) */ - if ((fc & 0xE0) == 0xC0) + if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0)) hdrlen = 10; else hdrlen = 16; - break; } - +out: return hdrlen; } -EXPORT_SYMBOL(ieee80211_get_hdrlen); +EXPORT_SYMBOL(ieee80211_hdrlen); -int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) { - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; - int hdrlen; + const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; if (unlikely(skb->len < 10)) return 0; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (unlikely(hdrlen > skb->len)) return 0; return hdrlen; @@ -241,16 +231,21 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; u16 dur; int erp; + bool short_preamble = false; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp, - sdata->bss_conf.use_short_preamble); + short_preamble); return cpu_to_le16(dur); } @@ -258,22 +253,29 @@ EXPORT_SYMBOL(ieee80211_generic_frame_duration); __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) + const struct ieee80211_tx_info *frame_txctl) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; + struct ieee80211_supported_band *sband; - short_preamble = sdata->bss_conf.use_short_preamble; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - rate = frame_txctl->rts_cts_rate; + short_preamble = false; + + rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* CTS duration */ dur = ieee80211_frame_duration(local, 10, rate->bitrate, @@ -292,26 +294,33 @@ EXPORT_SYMBOL(ieee80211_rts_duration); __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) + const struct ieee80211_tx_info *frame_txctl) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = false; - rate = frame_txctl->rts_cts_rate; + rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* Data frame duration */ dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp, short_preamble); - if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { + if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { /* ACK duration */ dur += ieee80211_frame_duration(local, 10, rate->bitrate, erp, short_preamble); @@ -325,17 +334,11 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); - if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) { - if (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) - tasklet_schedule(&local->tx_pending_tasklet); - else - if (!ieee80211_qdisc_installed(local->mdev)) { - if (queue == 0) - netif_wake_queue(local->mdev); - } else - __netif_schedule(local->mdev); + if (test_bit(queue, local->queues_pending)) { + set_bit(queue, local->queues_pending_run); + tasklet_schedule(&local->tx_pending_tasklet); + } else { + netif_wake_subqueue(local->mdev, queue); } } EXPORT_SYMBOL(ieee80211_wake_queue); @@ -344,38 +347,31 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); - if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) - netif_stop_queue(local->mdev); - set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); + netif_stop_subqueue(local->mdev, queue); } EXPORT_SYMBOL(ieee80211_stop_queue); -void ieee80211_start_queues(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - int i; - - for (i = 0; i < local->hw.queues; i++) - clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); - if (!ieee80211_qdisc_installed(local->mdev)) - netif_start_queue(local->mdev); -} -EXPORT_SYMBOL(ieee80211_start_queues); - void ieee80211_stop_queues(struct ieee80211_hw *hw) { int i; - for (i = 0; i < hw->queues; i++) + for (i = 0; i < ieee80211_num_queues(hw); i++) ieee80211_stop_queue(hw, i); } EXPORT_SYMBOL(ieee80211_stop_queues); +int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + return __netif_subqueue_stopped(local->mdev, queue); +} +EXPORT_SYMBOL(ieee80211_queue_stopped); + void ieee80211_wake_queues(struct ieee80211_hw *hw) { int i; - for (i = 0; i < hw->queues; i++) + for (i = 0; i < hw->queues + hw->ampdu_queues; i++) ieee80211_wake_queue(hw, i); } EXPORT_SYMBOL(ieee80211_wake_queues); @@ -393,19 +389,18 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } - if (sdata->dev == local->mdev) - continue; if (netif_running(sdata->dev)) iterator(data, sdata->dev->dev_addr, &sdata->vif); @@ -428,19 +423,18 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } - if (sdata->dev == local->mdev) - continue; if (netif_running(sdata->dev)) iterator(data, sdata->dev->dev_addr, &sdata->vif); @@ -449,3 +443,243 @@ void ieee80211_iterate_active_interfaces_atomic( rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); + +void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) +{ + size_t left = len; + u8 *pos = start; + + memset(elems, 0, sizeof(*elems)); + elems->ie_start = start; + elems->total_len = len; + + while (left >= 2) { + u8 id, elen; + + id = *pos++; + elen = *pos++; + left -= 2; + + if (elen > left) + return; + + switch (id) { + case WLAN_EID_SSID: + elems->ssid = pos; + elems->ssid_len = elen; + break; + case WLAN_EID_SUPP_RATES: + elems->supp_rates = pos; + elems->supp_rates_len = elen; + break; + case WLAN_EID_FH_PARAMS: + elems->fh_params = pos; + elems->fh_params_len = elen; + break; + case WLAN_EID_DS_PARAMS: + elems->ds_params = pos; + elems->ds_params_len = elen; + break; + case WLAN_EID_CF_PARAMS: + elems->cf_params = pos; + elems->cf_params_len = elen; + break; + case WLAN_EID_TIM: + elems->tim = pos; + elems->tim_len = elen; + break; + case WLAN_EID_IBSS_PARAMS: + elems->ibss_params = pos; + elems->ibss_params_len = elen; + break; + case WLAN_EID_CHALLENGE: + elems->challenge = pos; + elems->challenge_len = elen; + break; + case WLAN_EID_WPA: + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && + pos[2] == 0xf2) { + /* Microsoft OUI (00:50:F2) */ + if (pos[3] == 1) { + /* OUI Type 1 - WPA IE */ + elems->wpa = pos; + elems->wpa_len = elen; + } else if (elen >= 5 && pos[3] == 2) { + if (pos[4] == 0) { + elems->wmm_info = pos; + elems->wmm_info_len = elen; + } else if (pos[4] == 1) { + elems->wmm_param = pos; + elems->wmm_param_len = elen; + } + } + } + break; + case WLAN_EID_RSN: + elems->rsn = pos; + elems->rsn_len = elen; + break; + case WLAN_EID_ERP_INFO: + elems->erp_info = pos; + elems->erp_info_len = elen; + break; + case WLAN_EID_EXT_SUPP_RATES: + elems->ext_supp_rates = pos; + elems->ext_supp_rates_len = elen; + break; + case WLAN_EID_HT_CAPABILITY: + elems->ht_cap_elem = pos; + elems->ht_cap_elem_len = elen; + break; + case WLAN_EID_HT_EXTRA_INFO: + elems->ht_info_elem = pos; + elems->ht_info_elem_len = elen; + break; + case WLAN_EID_MESH_ID: + elems->mesh_id = pos; + elems->mesh_id_len = elen; + break; + case WLAN_EID_MESH_CONFIG: + elems->mesh_config = pos; + elems->mesh_config_len = elen; + break; + case WLAN_EID_PEER_LINK: + elems->peer_link = pos; + elems->peer_link_len = elen; + break; + case WLAN_EID_PREQ: + elems->preq = pos; + elems->preq_len = elen; + break; + case WLAN_EID_PREP: + elems->prep = pos; + elems->prep_len = elen; + break; + case WLAN_EID_PERR: + elems->perr = pos; + elems->perr_len = elen; + break; + case WLAN_EID_CHANNEL_SWITCH: + elems->ch_switch_elem = pos; + elems->ch_switch_elem_len = elen; + break; + case WLAN_EID_QUIET: + if (!elems->quiet_elem) { + elems->quiet_elem = pos; + elems->quiet_elem_len = elen; + } + elems->num_of_quiet_elem++; + break; + case WLAN_EID_COUNTRY: + elems->country_elem = pos; + elems->country_elem_len = elen; + break; + case WLAN_EID_PWR_CONSTRAINT: + elems->pwr_constr_elem = pos; + elems->pwr_constr_elem_len = elen; + break; + default: + break; + } + + left -= elen; + pos += elen; + } +} + +void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_queue_params qparam; + int i; + + if (!local->ops->conf_tx) + return; + + memset(&qparam, 0, sizeof(qparam)); + + qparam.aifs = 2; + + if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)) + qparam.cw_min = 31; + else + qparam.cw_min = 15; + + qparam.cw_max = 1023; + qparam.txop = 0; + + for (i = 0; i < local_to_hw(local)->queues; i++) + local->ops->conf_tx(local_to_hw(local), i, &qparam); +} + +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + int encrypt) +{ + skb->dev = sdata->local->mdev; + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, 0); + skb_set_transport_header(skb, 0); + + skb->iif = sdata->dev->ifindex; + skb->do_not_encrypt = !encrypt; + + dev_queue_xmit(skb); +} + +int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) +{ + int ret = -EINVAL; + struct ieee80211_channel *chan; + struct ieee80211_local *local = sdata->local; + + chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); + + if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + chan->flags & IEEE80211_CHAN_NO_IBSS) { + printk(KERN_DEBUG "%s: IBSS not allowed on frequency " + "%d MHz\n", sdata->dev->name, chan->center_freq); + return ret; + } + local->oper_channel = chan; + + if (local->sw_scanning || local->hw_scanning) + ret = 0; + else + ret = ieee80211_hw_config(local); + + rate_control_clear(local); + } + + return ret; +} + +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + u64 mandatory_rates; + enum ieee80211_rate_flags mandatory_flag; + int i; + + sband = local->hw.wiphy->bands[band]; + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + } + + if (band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + mandatory_rates = 0; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index affcecd78c10..f0e2d3ecb5c4 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -31,13 +31,13 @@ int ieee80211_wep_init(struct ieee80211_local *local) local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) - return -ENOMEM; + return PTR_ERR(local->wep_tx_tfm); local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_rx_tfm)) { crypto_free_blkcipher(local->wep_tx_tfm); - return -ENOMEM; + return PTR_ERR(local->wep_rx_tfm); } return 0; @@ -84,24 +84,17 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; u8 *newhdr; - fc = le16_to_cpu(hdr->frame_control); - fc |= IEEE80211_FCTL_PROTECTED; - hdr->frame_control = cpu_to_le16(fc); + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if ((skb_headroom(skb) < WEP_IV_LEN || - skb_tailroom(skb) < WEP_ICV_LEN)) { - I802_DEBUG_INC(local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, WEP_IV_LEN, WEP_ICV_LEN, - GFP_ATOMIC))) - return NULL; - } + if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || + skb_headroom(skb) < WEP_IV_LEN)) + return NULL; - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); newhdr = skb_push(skb, WEP_IV_LEN); memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); ieee80211_wep_get_iv(local, key, newhdr + hdrlen); @@ -113,12 +106,10 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); skb_pull(skb, WEP_IV_LEN); } @@ -228,22 +219,19 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, u32 klen; u8 *rc4key; u8 keyidx; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; size_t len; int ret = 0; - fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return -1; - hdrlen = ieee80211_get_hdrlen(fc); - - if (skb->len < 8 + hdrlen) + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - 8; + len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -264,11 +252,8 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, - len)) { - if (net_ratelimit()) - printk(KERN_DEBUG "WEP decrypt failed (ICV)\n"); + len)) ret = -1; - } kfree(rc4key); @@ -285,17 +270,15 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + unsigned int hdrlen; u8 *ivpos; u32 iv; - fc = le16_to_cpu(hdr->frame_control); - if (!(fc & IEEE80211_FCTL_PROTECTED)) + if (!ieee80211_has_protected(hdr->frame_control)) return NULL; - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); ivpos = skb->data + hdrlen; iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; @@ -308,24 +291,19 @@ u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) { - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_auth(hdr->frame_control)) return RX_CONTINUE; if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { - if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: RX WEP frame, decrypt " - "failed\n", rx->dev->name); -#endif /* CONFIG_MAC80211_DEBUG */ + if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; - } } else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) { ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - skb_trim(rx->skb, rx->skb->len - 4); + skb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN); } return RX_CONTINUE; @@ -333,11 +311,13 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) return -1; } else { - tx->control->key_idx = tx->key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; if (tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) { if (!ieee80211_wep_add_iv(tx->local, skb, tx->key)) return -1; @@ -349,8 +329,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) { - tx->control->iv_len = WEP_IV_LEN; - tx->control->icv_len = WEP_ICV_LEN; ieee80211_tx_set_protected(tx); if (wep_encrypt_skb(tx, tx->skb) < 0) { diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index 363779c50658..e587172115b8 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -26,7 +26,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); -u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); +u8 *ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 6106cb79060c..7e0d53abde24 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -27,22 +27,19 @@ #include "aes_ccm.h" -static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, +static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta_addr, int idx, int alg, int remove, int set_tx_key, const u8 *_key, size_t key_len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_sub_if_data *sdata; int err; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", - dev->name, idx); + sdata->dev->name, idx); return -EINVAL; } @@ -95,6 +92,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, } } + if (alg == ALG_WEP && + key_len != LEN_WEP40 && key_len != LEN_WEP104) { + ieee80211_key_free(key); + err = -EINVAL; + goto out_unlock; + } + ieee80211_key_link(key, sdata, sta); if (set_tx_key || (!sta && !sdata->default_key && key)) @@ -118,13 +122,13 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); if (ret) return ret; sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; } @@ -135,7 +139,39 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_supported_band *sband; + u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0; + + + sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ]; + if (sband) { + is_a = 1; + is_ht |= sband->ht_info.ht_supported; + } + + sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ]; + if (sband) { + int i; + /* Check for mandatory rates */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) + is_b = 1; + if (sband->bitrates[i].bitrate == 60) + is_g = 1; + } + is_ht |= sband->ht_info.ht_supported; + } + strcpy(name, "IEEE 802.11"); + if (is_a) + strcat(name, "a"); + if (is_b) + strcat(name, "b"); + if (is_g) + strcat(name, "g"); + if (is_ht) + strcat(name, "n"); return 0; } @@ -169,14 +205,26 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_encoding_sizes = 2; range->max_encoding_tokens = NUM_DEFAULT_KEYS; - range->max_qual.qual = local->hw.max_signal; - range->max_qual.level = local->hw.max_rssi; - range->max_qual.noise = local->hw.max_noise; + if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC || + local->hw.flags & IEEE80211_HW_SIGNAL_DB) + range->max_qual.level = local->hw.max_signal; + else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + range->max_qual.level = -110; + else + range->max_qual.level = 0; + + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) + range->max_qual.noise = -110; + else + range->max_qual.noise = 0; + + range->max_qual.qual = 100; range->max_qual.updated = local->wstats_flags; - range->avg_qual.qual = local->hw.max_signal/2; - range->avg_qual.level = 0; - range->avg_qual.noise = 0; + range->avg_qual.qual = 50; + /* not always true but better than nothing */ + range->avg_qual.level = range->max_qual.level / 2; + range->avg_qual.noise = range->max_qual.noise / 2; range->avg_qual.updated = local->wstats_flags; range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | @@ -225,35 +273,27 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int type; - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; switch (*mode) { case IW_MODE_INFRA: - type = IEEE80211_IF_TYPE_STA; + type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: - type = IEEE80211_IF_TYPE_IBSS; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: - type = IEEE80211_IF_TYPE_WDS; + type = NL80211_IFTYPE_WDS; break; case IW_MODE_MONITOR: - type = IEEE80211_IF_TYPE_MNTR; + type = NL80211_IFTYPE_MONITOR; break; default: return -EINVAL; } - if (type == sdata->vif.type) - return 0; - if (netif_running(dev)) - return -EBUSY; - - ieee80211_if_reinit(dev); - ieee80211_if_set_type(dev, type); - - return 0; + return ieee80211_if_change_type(sdata, type); } @@ -265,22 +305,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: *mode = IW_MODE_MASTER; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: *mode = IW_MODE_INFRA; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: *mode = IW_MODE_ADHOC; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: *mode = IW_MODE_MONITOR; break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: *mode = IW_MODE_REPEAT; break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: *mode = IW_MODE_SECOND; /* FIXME */ break; default: @@ -290,60 +330,31 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, return 0; } -int ieee80211_set_freq(struct net_device *dev, int freqMHz) -{ - int ret = -EINVAL; - struct ieee80211_channel *chan; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); - - if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - chan->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", dev->name, chan->center_freq); - return ret; - } - local->oper_channel = chan; - - if (local->sta_sw_scanning || local->sta_hw_scanning) - ret = 0; - else - ret = ieee80211_hw_config(local); - - rate_control_clear(local); - } - - return ret; -} - static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(dev, + return ieee80211_set_freq(sdata, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div > 0) - return ieee80211_set_freq(dev, freq->m / div); + return ieee80211_set_freq(sdata, freq->m / div); else return -EINVAL; } @@ -375,8 +386,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, len--; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) @@ -389,19 +400,19 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; else sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; - ret = ieee80211_sta_set_ssid(dev, ssid, len); + ret = ieee80211_sta_set_ssid(sdata, ssid, len); if (ret) return ret; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { memcpy(sdata->u.ap.ssid, ssid, len); memset(sdata->u.ap.ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); sdata->u.ap.ssid_len = len; - return ieee80211_if_config(dev); + return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); } return -EOPNOTSUPP; } @@ -415,9 +426,9 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - int res = ieee80211_sta_get_ssid(dev, ssid, &len); + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + int res = ieee80211_sta_get_ssid(sdata, ssid, &len); if (res == 0) { data->length = len; data->flags = 1; @@ -426,7 +437,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { len = sdata->u.ap.ssid_len; if (len > IW_ESSID_MAX_SIZE) len = IW_ESSID_MAX_SIZE; @@ -446,8 +457,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, @@ -461,12 +472,12 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; - ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data); + ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); if (ret) return ret; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { /* * If it is necessary to update the WDS peer address * while the interface is running, then we need to do @@ -494,10 +505,10 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - if (sdata->u.sta.state == IEEE80211_ASSOCIATED || - sdata->u.sta.state == IEEE80211_IBSS_JOINED) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED || + sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); return 0; @@ -505,7 +516,7 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, memset(&ap_addr->sa_data, 0, ETH_ALEN); return 0; } - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); return 0; @@ -527,10 +538,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP) return -EOPNOTSUPP; /* if SSID was specified explicitly then use that */ @@ -541,7 +552,7 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, ssid_len = req->essid_len; } - return ieee80211_sta_req_scan(dev, ssid, ssid_len); + return ieee80211_request_scan(sdata, ssid, ssid_len); } @@ -551,11 +562,14 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, { int res; struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; - if (local->sta_sw_scanning || local->sta_hw_scanning) + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (local->sw_scanning || local->hw_scanning) return -EAGAIN; - res = ieee80211_sta_scan_results(dev, extra, data->length); + res = ieee80211_scan_results(local, info, extra, data->length); if (res >= 0) { data->length = res; return 0; @@ -576,16 +590,14 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, struct ieee80211_supported_band *sband; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!sdata->bss) - return -ENODEV; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; /* target_rate = -1, rate->fixed = 0 means auto only, so use all rates * target_rate = X, rate->fixed = 1 means only rate X * target_rate = X, rate->fixed = 0 means all rates <= X */ - sdata->bss->max_ratectrl_rateidx = -1; - sdata->bss->force_unicast_rateidx = -1; + sdata->max_ratectrl_rateidx = -1; + sdata->force_unicast_rateidx = -1; if (rate->value < 0) return 0; @@ -594,9 +606,9 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, int this_rate = brate->bitrate; if (target_rate == this_rate) { - sdata->bss->max_ratectrl_rateidx = i; + sdata->max_ratectrl_rateidx = i; if (rate->fixed) - sdata->bss->force_unicast_rateidx = i; + sdata->force_unicast_rateidx = i; err = 0; break; } @@ -615,7 +627,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -624,8 +636,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta && sta->txrate_idx < sband->n_bitrates) - rate->value = sband->bitrates[sta->txrate_idx].bitrate; + if (sta && sta->last_txrate_idx < sband->n_bitrates) + rate->value = sband->bitrates[sta->last_txrate_idx].bitrate; else rate->value = 0; @@ -709,6 +721,9 @@ static int ieee80211_ioctl_siwrts(struct net_device *dev, if (rts->disabled) local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; + else if (!rts->fixed) + /* if the rts value is not fixed, then take default */ + local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD) return -EINVAL; else @@ -746,6 +761,8 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, if (frag->disabled) local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; + else if (!frag->fixed) + local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; else if (frag->value < 256 || frag->value > IEEE80211_MAX_FRAG_THRESHOLD) return -EINVAL; @@ -841,17 +858,17 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_mlme *mlme = (struct iw_mlme *) extra; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; switch (mlme->cmd) { case IW_MLME_DEAUTH: /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_deauthenticate(dev, mlme->reason_code); + return ieee80211_sta_deauthenticate(sdata, mlme->reason_code); case IW_MLME_DISASSOC: /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_disassociate(dev, mlme->reason_code); + return ieee80211_sta_disassociate(sdata, mlme->reason_code); default: return -EOPNOTSUPP; } @@ -892,7 +909,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, } return ieee80211_set_encryption( - dev, bcaddr, + sdata, bcaddr, idx, alg, remove, !sdata->default_key, keybuf, erq->length); @@ -937,6 +954,58 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + switch (ifsta->auth_alg) { + case WLAN_AUTH_OPEN: + case WLAN_AUTH_LEAP: + erq->flags |= IW_ENCODE_OPEN; + break; + case WLAN_AUTH_SHARED_KEY: + erq->flags |= IW_ENCODE_RESTRICTED; + break; + } + } + + return 0; +} + +static int ieee80211_ioctl_siwpower(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *wrq, + char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_conf *conf = &local->hw.conf; + + if (wrq->disabled) { + conf->flags &= ~IEEE80211_CONF_PS; + return ieee80211_hw_config(local); + } + + switch (wrq->flags & IW_POWER_MODE) { + case IW_POWER_ON: /* If not specified */ + case IW_POWER_MODE: /* If set all mask */ + case IW_POWER_ALL_R: /* If explicitely state all */ + conf->flags |= IEEE80211_CONF_PS; + break; + default: /* Otherwise we don't support it */ + return -EINVAL; + } + + return ieee80211_hw_config(local); +} + +static int ieee80211_ioctl_giwpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_conf *conf = &local->hw.conf; + + wrqu->power.disabled = !(conf->flags & IEEE80211_CONF_PS); + return 0; } @@ -959,7 +1028,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, sdata->drop_unencrypted = !!data->value; break; case IW_AUTH_PRIVACY_INVOKED: - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) ret = -EINVAL; else { sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; @@ -974,8 +1043,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, } break; case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sdata->u.sta.auth_algs = data->value; else ret = -EOPNOTSUPP; @@ -997,8 +1066,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev rcu_read_lock(); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sta = sta_info_get(local, sdata->u.sta.bssid); if (!sta) { wstats->discard.fragment = 0; @@ -1008,8 +1077,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev wstats->qual.noise = 0; wstats->qual.updated = IW_QUAL_ALL_INVALID; } else { - wstats->qual.level = sta->last_rssi; - wstats->qual.qual = sta->last_signal; + wstats->qual.level = sta->last_signal; + wstats->qual.qual = sta->last_qual; wstats->qual.noise = sta->last_noise; wstats->qual.updated = local->wstats_flags; } @@ -1028,8 +1097,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) data->value = sdata->u.sta.auth_algs; else ret = -EOPNOTSUPP; @@ -1086,7 +1155,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, } else idx--; - return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg, + return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, @@ -1142,8 +1211,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ (iw_handler) ieee80211_ioctl_giwencode, /* SIOCGIWENCODE */ - (iw_handler) NULL, /* SIOCSIWPOWER */ - (iw_handler) NULL, /* SIOCGIWPOWER */ + (iw_handler) ieee80211_ioctl_siwpower, /* SIOCSIWPOWER */ + (iw_handler) ieee80211_ioctl_giwpower, /* SIOCGIWPOWER */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) ieee80211_ioctl_siwgenie, /* SIOCSIWGENIE */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 635b996c8c35..139b5f267b34 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -18,61 +18,40 @@ #include "ieee80211_i.h" #include "wme.h" -/* maximum number of hardware queues we support. */ -#define TC_80211_MAX_QUEUES 16 - +/* Default mapping in classifier to work with default + * queue setup. + */ const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; -struct ieee80211_sched_data -{ - unsigned long qdisc_pool[BITS_TO_LONGS(TC_80211_MAX_QUEUES)]; - struct tcf_proto *filter_list; - struct Qdisc *queues[TC_80211_MAX_QUEUES]; - struct sk_buff_head requeued[TC_80211_MAX_QUEUES]; -}; - static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0}; -/* given a data frame determine the 802.1p/1d tag to use */ -static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd) +/* Given a data frame determine the 802.1p/1d tag to use. */ +static unsigned int classify_1d(struct sk_buff *skb) { - struct iphdr *ip; - int dscp; - int offset; - - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct tcf_result res = { -1, 0 }; - - /* if there is a user set filter list, call out to that */ - if (q->filter_list) { - tc_classify(skb, q->filter_list, &res); - if (res.class != -1) - return res.class; - } + unsigned int dscp; /* skb->priority values from 256->263 are magic values to - * directly indicate a specific 802.1d priority. - * This is used to allow 802.1d priority to be passed directly in - * from VLAN tags, etc. */ + * directly indicate a specific 802.1d priority. This is used + * to allow 802.1d priority to be passed directly in from VLAN + * tags, etc. + */ if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; - /* check there is a valid IP header present */ - offset = ieee80211_get_hdrlen_from_skb(skb); - if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) || - memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr))) - return 0; + switch (skb->protocol) { + case htons(ETH_P_IP): + dscp = ip_hdr(skb)->tos & 0xfc; + break; - ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr)); - - dscp = ip->tos & 0xfc; - if (dscp & 0x1c) + default: return 0; + } + return dscp >> 5; } -static inline int wme_downgrade_ac(struct sk_buff *skb) +static int wme_downgrade_ac(struct sk_buff *skb) { switch (skb->priority) { case 6: @@ -93,43 +72,37 @@ static inline int wme_downgrade_ac(struct sk_buff *skb) } -/* positive return value indicates which queue to use - * negative return value indicates to drop the frame */ -static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) +/* Indicate which queue to use. */ +static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - unsigned short fc = le16_to_cpu(hdr->frame_control); - int qos; - /* see if frame is data or non data frame */ - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) { + if (!ieee80211_is_data(hdr->frame_control)) { /* management frames go on AC_VO queue, but are sent * without QoS control fields */ - return IEEE80211_TX_QUEUE_DATA0; + return 0; } if (0 /* injected */) { /* use AC from radiotap */ } - /* is this a QoS frame? */ - qos = fc & IEEE80211_STYPE_QOS_DATA; - - if (!qos) { + if (!ieee80211_is_data_qos(hdr->frame_control)) { skb->priority = 0; /* required for correct WPA/11i MIC */ return ieee802_1d_to_ac[skb->priority]; } /* use the data classifier to determine what 802.1d tag the * data frame has */ - skb->priority = classify_1d(skb, qd); + skb->priority = classify_1d(skb); /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* No AC with lower priority has acm=0, drop packet. */ - return -1; + /* The old code would drop the packet in this + * case. + */ + return 0; } } @@ -137,55 +110,53 @@ static inline int classify80211(struct sk_buff *skb, struct Qdisc *qd) return ieee802_1d_to_ac[skb->priority]; } - -static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) +u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - unsigned short fc = le16_to_cpu(hdr->frame_control); - struct Qdisc *qdisc; - int err, queue; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; + u16 queue; u8 tid; - if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) { - queue = pkt_data->queue; + queue = classify80211(local, skb); + if (unlikely(queue >= local->hw.queues)) + queue = local->hw.queues - 1; + + if (info->flags & IEEE80211_TX_CTL_REQUEUE) { rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (sta) { + struct ieee80211_hw *hw = &local->hw; int ampdu_queue = sta->tid_to_tx_q[tid]; - if ((ampdu_queue < local->hw.queues) && - test_bit(ampdu_queue, q->qdisc_pool)) { + + if ((ampdu_queue < ieee80211_num_queues(hw)) && + test_bit(ampdu_queue, local->queue_pool)) { queue = ampdu_queue; - pkt_data->flags |= IEEE80211_TXPD_AMPDU; + info->flags |= IEEE80211_TX_CTL_AMPDU; } else { - pkt_data->flags &= ~IEEE80211_TXPD_AMPDU; + info->flags &= ~IEEE80211_TX_CTL_AMPDU; } } rcu_read_unlock(); - skb_queue_tail(&q->requeued[queue], skb); - qd->q.qlen++; - return 0; - } - queue = classify80211(skb, qd); + return queue; + } - /* now we know the 1d priority, fill in the QoS header if there is one + /* Now we know the 1d priority, fill in the QoS header if + * there is one. */ - if (WLAN_FC_IS_QOS_DATA(fc)) { - u8 *p = skb->data + ieee80211_get_hdrlen(fc) - 2; + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *p = ieee80211_get_qos_ctl(hdr); u8 ack_policy = 0; - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (local->wifi_wme_noack_test) ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK << QOS_CONTROL_ACK_POLICY_SHIFT; /* qos header is 2 bytes, second reserved */ - *p = ack_policy | tid; - p++; + *p++ = ack_policy | tid; *p = 0; rcu_read_lock(); @@ -193,476 +164,40 @@ static int wme_qdiscop_enqueue(struct sk_buff *skb, struct Qdisc* qd) sta = sta_info_get(local, hdr->addr1); if (sta) { int ampdu_queue = sta->tid_to_tx_q[tid]; - if ((ampdu_queue < local->hw.queues) && - test_bit(ampdu_queue, q->qdisc_pool)) { + struct ieee80211_hw *hw = &local->hw; + + if ((ampdu_queue < ieee80211_num_queues(hw)) && + test_bit(ampdu_queue, local->queue_pool)) { queue = ampdu_queue; - pkt_data->flags |= IEEE80211_TXPD_AMPDU; + info->flags |= IEEE80211_TX_CTL_AMPDU; } else { - pkt_data->flags &= ~IEEE80211_TXPD_AMPDU; + info->flags &= ~IEEE80211_TX_CTL_AMPDU; } } rcu_read_unlock(); } - if (unlikely(queue >= local->hw.queues)) { -#if 0 - if (net_ratelimit()) { - printk(KERN_DEBUG "%s - queue=%d (hw does not " - "support) -> %d\n", - __func__, queue, local->hw.queues - 1); - } -#endif - queue = local->hw.queues - 1; - } - - if (unlikely(queue < 0)) { - kfree_skb(skb); - err = NET_XMIT_DROP; - } else { - tid = skb->priority & QOS_CONTROL_TAG1D_MASK; - pkt_data->queue = (unsigned int) queue; - qdisc = q->queues[queue]; - err = qdisc->enqueue(skb, qdisc); - if (err == NET_XMIT_SUCCESS) { - qd->q.qlen++; - qd->bstats.bytes += skb->len; - qd->bstats.packets++; - return NET_XMIT_SUCCESS; - } - } - qd->qstats.drops++; - return err; -} - - -/* TODO: clean up the cases where master_hard_start_xmit - * returns non 0 - it shouldn't ever do that. Once done we - * can remove this function */ -static int wme_qdiscop_requeue(struct sk_buff *skb, struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_tx_packet_data *pkt_data = - (struct ieee80211_tx_packet_data *) skb->cb; - struct Qdisc *qdisc; - int err; - - /* we recorded which queue to use earlier! */ - qdisc = q->queues[pkt_data->queue]; - - if ((err = qdisc->ops->requeue(skb, qdisc)) == 0) { - qd->q.qlen++; - return 0; - } - qd->qstats.drops++; - return err; -} - - -static struct sk_buff *wme_qdiscop_dequeue(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct sk_buff *skb; - struct Qdisc *qdisc; - int queue; - - /* check all the h/w queues in numeric/priority order */ - for (queue = 0; queue < hw->queues; queue++) { - /* see if there is room in this hardware queue */ - if ((test_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) || - (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) || - (!test_bit(queue, q->qdisc_pool))) - continue; - - /* there is space - try and get a frame */ - skb = skb_dequeue(&q->requeued[queue]); - if (skb) { - qd->q.qlen--; - return skb; - } - - qdisc = q->queues[queue]; - skb = qdisc->dequeue(qdisc); - if (skb) { - qd->q.qlen--; - return skb; - } - } - /* returning a NULL here when all the h/w queues are full means we - * never need to call netif_stop_queue in the driver */ - return NULL; -} - - -static void wme_qdiscop_reset(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - /* QUESTION: should we have some hardware flush functionality here? */ - - for (queue = 0; queue < hw->queues; queue++) { - skb_queue_purge(&q->requeued[queue]); - qdisc_reset(q->queues[queue]); - } - qd->q.qlen = 0; -} - - -static void wme_qdiscop_destroy(struct Qdisc* qd) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - tcf_destroy_chain(q->filter_list); - q->filter_list = NULL; - - for (queue=0; queue < hw->queues; queue++) { - skb_queue_purge(&q->requeued[queue]); - qdisc_destroy(q->queues[queue]); - q->queues[queue] = &noop_qdisc; - } -} - - -/* called whenever parameters are updated on existing qdisc */ -static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt) -{ -/* struct ieee80211_sched_data *q = qdisc_priv(qd); -*/ - /* check our options block is the right size */ - /* copy any options to our local structure */ -/* Ignore options block for now - always use static mapping - struct tc_ieee80211_qopt *qopt = nla_data(opt); - - if (opt->nla_len < nla_attr_size(sizeof(*qopt))) - return -EINVAL; - memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue)); -*/ - return 0; -} - - -/* called during initial creation of qdisc on device */ -static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct net_device *dev = qd->dev; - struct ieee80211_local *local; - int queues; - int err = 0, i; - - /* check that device is a mac80211 device */ - if (!dev->ieee80211_ptr || - dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) - return -EINVAL; - - /* check this device is an ieee80211 master type device */ - if (dev->type != ARPHRD_IEEE80211) - return -EINVAL; - - /* check that there is no qdisc currently attached to device - * this ensures that we will be the root qdisc. (I can't find a better - * way to test this explicitly) */ - if (dev->qdisc_sleeping != &noop_qdisc) - return -EINVAL; - - if (qd->flags & TCQ_F_INGRESS) - return -EINVAL; - - local = wdev_priv(dev->ieee80211_ptr); - queues = local->hw.queues; - - /* if options were passed in, set them */ - if (opt) { - err = wme_qdiscop_tune(qd, opt); - } - - /* create child queues */ - for (i = 0; i < queues; i++) { - skb_queue_head_init(&q->requeued[i]); - q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops, - qd->handle); - if (!q->queues[i]) { - q->queues[i] = &noop_qdisc; - printk(KERN_ERR "%s child qdisc %i creation failed\n", - dev->name, i); - } - } - - /* reserve all legacy QoS queues */ - for (i = 0; i < min(IEEE80211_TX_QUEUE_DATA4, queues); i++) - set_bit(i, q->qdisc_pool); - - return err; -} - -static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb) -{ -/* struct ieee80211_sched_data *q = qdisc_priv(qd); - unsigned char *p = skb->tail; - struct tc_ieee80211_qopt opt; - - memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1); - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); -*/ return skb->len; -/* -nla_put_failure: - skb_trim(skb, p - skb->data);*/ - return -1; -} - - -static int wme_classop_graft(struct Qdisc *qd, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = arg - 1; - - if (queue >= hw->queues) - return -EINVAL; - - if (!new) - new = &noop_qdisc; - - sch_tree_lock(qd); - *old = q->queues[queue]; - q->queues[queue] = new; - qdisc_reset(*old); - sch_tree_unlock(qd); - - return 0; -} - - -static struct Qdisc * -wme_classop_leaf(struct Qdisc *qd, unsigned long arg) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = arg - 1; - - if (queue >= hw->queues) - return NULL; - - return q->queues[queue]; -} - - -static unsigned long wme_classop_get(struct Qdisc *qd, u32 classid) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - unsigned long queue = TC_H_MIN(classid); - - if (queue - 1 >= hw->queues) - return 0; - return queue; } - -static unsigned long wme_classop_bind(struct Qdisc *qd, unsigned long parent, - u32 classid) -{ - return wme_classop_get(qd, classid); -} - - -static void wme_classop_put(struct Qdisc *q, unsigned long cl) -{ -} - - -static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - - /* TODO: put code to program hardware queue parameters here, - * to allow programming from tc command line */ - - return 0; -} - - -/* we don't support deleting hardware queues - * when we add WMM-SA support - TSPECs may be deleted here */ -static int wme_classop_delete(struct Qdisc *qd, unsigned long cl) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - return 0; -} - - -static int wme_classop_dump_class(struct Qdisc *qd, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - - if (cl - 1 > hw->queues) - return -ENOENT; - tcm->tcm_handle = TC_H_MIN(cl); - tcm->tcm_parent = qd->handle; - tcm->tcm_info = q->queues[cl-1]->handle; /* do we need this? */ - return 0; -} - - -static void wme_classop_walk(struct Qdisc *qd, struct qdisc_walker *arg) -{ - struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - int queue; - - if (arg->stop) - return; - - for (queue = 0; queue < hw->queues; queue++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - /* we should return classids for our internal queues here - * as well as the external ones */ - if (arg->fn(qd, queue+1, arg) < 0) { - arg->stop = 1; - break; - } - arg->count++; - } -} - - -static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd, - unsigned long cl) -{ - struct ieee80211_sched_data *q = qdisc_priv(qd); - - if (cl) - return NULL; - - return &q->filter_list; -} - - -/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached) - * - these are the operations on the classes */ -static const struct Qdisc_class_ops class_ops = -{ - .graft = wme_classop_graft, - .leaf = wme_classop_leaf, - - .get = wme_classop_get, - .put = wme_classop_put, - .change = wme_classop_change, - .delete = wme_classop_delete, - .walk = wme_classop_walk, - - .tcf_chain = wme_classop_find_tcf, - .bind_tcf = wme_classop_bind, - .unbind_tcf = wme_classop_put, - - .dump = wme_classop_dump_class, -}; - - -/* queueing discipline operations */ -static struct Qdisc_ops wme_qdisc_ops __read_mostly = -{ - .next = NULL, - .cl_ops = &class_ops, - .id = "ieee80211", - .priv_size = sizeof(struct ieee80211_sched_data), - - .enqueue = wme_qdiscop_enqueue, - .dequeue = wme_qdiscop_dequeue, - .requeue = wme_qdiscop_requeue, - .drop = NULL, /* drop not needed since we are always the root qdisc */ - - .init = wme_qdiscop_init, - .reset = wme_qdiscop_reset, - .destroy = wme_qdiscop_destroy, - .change = wme_qdiscop_tune, - - .dump = wme_qdiscop_dump, -}; - - -void ieee80211_install_qdisc(struct net_device *dev) -{ - struct Qdisc *qdisc; - - qdisc = qdisc_create_dflt(dev, &wme_qdisc_ops, TC_H_ROOT); - if (!qdisc) { - printk(KERN_ERR "%s: qdisc installation failed\n", dev->name); - return; - } - - /* same handle as would be allocated by qdisc_alloc_handle() */ - qdisc->handle = 0x80010000; - - qdisc_lock_tree(dev); - list_add_tail(&qdisc->list, &dev->qdisc_list); - dev->qdisc_sleeping = qdisc; - qdisc_unlock_tree(dev); -} - - -int ieee80211_qdisc_installed(struct net_device *dev) -{ - return dev->qdisc_sleeping->ops == &wme_qdisc_ops; -} - - -int ieee80211_wme_register(void) -{ - return register_qdisc(&wme_qdisc_ops); -} - - -void ieee80211_wme_unregister(void) -{ - unregister_qdisc(&wme_qdisc_ops); -} - int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) + struct sta_info *sta, u16 tid) { int i; - struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); - DECLARE_MAC_BUF(mac); + + /* XXX: currently broken due to cb/requeue use */ + return -EPERM; /* prepare the filter and save it for the SW queue - * matching the recieved HW queue */ + * matching the received HW queue */ + + if (!local->hw.ampdu_queues) + return -EPERM; /* try to get a Qdisc from the pool */ - for (i = IEEE80211_TX_QUEUE_BEACON; i < local->hw.queues; i++) - if (!test_and_set_bit(i, q->qdisc_pool)) { + for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++) + if (!test_and_set_bit(i, local->queue_pool)) { ieee80211_stop_queue(local_to_hw(local), i); sta->tid_to_tx_q[tid] = i; @@ -671,11 +206,13 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, * on the previous queue * since HT is strict in order */ #ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) + if (net_ratelimit()) { + DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "allocated aggregation queue" " %d tid %d addr %s pool=0x%lX\n", - i, tid, print_mac(mac, sta->addr), - q->qdisc_pool[0]); + i, tid, print_mac(mac, sta->sta.addr), + local->queue_pool[0]); + } #endif /* CONFIG_MAC80211_HT_DEBUG */ return 0; } @@ -684,44 +221,81 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, } /** - * the caller needs to hold local->mdev->queue_lock + * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock */ void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, u8 requeue) { - struct ieee80211_sched_data *q = - qdisc_priv(local->mdev->qdisc_sleeping); int agg_queue = sta->tid_to_tx_q[tid]; + struct ieee80211_hw *hw = &local->hw; /* return the qdisc to the pool */ - clear_bit(agg_queue, q->qdisc_pool); - sta->tid_to_tx_q[tid] = local->hw.queues; + clear_bit(agg_queue, local->queue_pool); + sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw); - if (requeue) + if (requeue) { ieee80211_requeue(local, agg_queue); - else - q->queues[agg_queue]->ops->reset(q->queues[agg_queue]); + } else { + struct netdev_queue *txq; + spinlock_t *root_lock; + struct Qdisc *q; + + txq = netdev_get_tx_queue(local->mdev, agg_queue); + q = rcu_dereference(txq->qdisc); + root_lock = qdisc_lock(q); + + spin_lock_bh(root_lock); + qdisc_reset(q); + spin_unlock_bh(root_lock); + } } void ieee80211_requeue(struct ieee80211_local *local, int queue) { - struct Qdisc *root_qd = local->mdev->qdisc_sleeping; - struct ieee80211_sched_data *q = qdisc_priv(root_qd); - struct Qdisc *qdisc = q->queues[queue]; - struct sk_buff *skb = NULL; + struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue); + struct sk_buff_head list; + spinlock_t *root_lock; + struct Qdisc *qdisc; u32 len; + rcu_read_lock_bh(); + + qdisc = rcu_dereference(txq->qdisc); if (!qdisc || !qdisc->dequeue) - return; + goto out_unlock; + + skb_queue_head_init(&list); - printk(KERN_DEBUG "requeue: qlen = %d\n", qdisc->q.qlen); + root_lock = qdisc_root_lock(qdisc); + spin_lock(root_lock); for (len = qdisc->q.qlen; len > 0; len--) { - skb = qdisc->dequeue(qdisc); - root_qd->q.qlen--; - /* packet will be classified again and */ - /* skb->packet_data->queue will be overridden if needed */ + struct sk_buff *skb = qdisc->dequeue(qdisc); + if (skb) - wme_qdiscop_enqueue(skb, root_qd); + __skb_queue_tail(&list, skb); + } + spin_unlock(root_lock); + + for (len = list.qlen; len > 0; len--) { + struct sk_buff *skb = __skb_dequeue(&list); + u16 new_queue; + + BUG_ON(!skb); + new_queue = ieee80211_select_queue(local->mdev, skb); + skb_set_queue_mapping(skb, new_queue); + + txq = netdev_get_tx_queue(local->mdev, new_queue); + + + qdisc = rcu_dereference(txq->qdisc); + root_lock = qdisc_root_lock(qdisc); + + spin_lock(root_lock); + qdisc_enqueue_root(skb, qdisc); + spin_unlock(root_lock); } + +out_unlock: + rcu_read_unlock_bh(); } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index fcc6b05508cc..bc62f28a4d3d 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -1,5 +1,4 @@ /* - * IEEE 802.11 driver (80211.o) - QoS datatypes * Copyright 2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * @@ -14,62 +13,19 @@ #include <linux/netdevice.h> #include "ieee80211_i.h" -#define QOS_CONTROL_LEN 2 - #define QOS_CONTROL_ACK_POLICY_NORMAL 0 #define QOS_CONTROL_ACK_POLICY_NOACK 1 -#define QOS_CONTROL_TID_MASK 0x0f #define QOS_CONTROL_ACK_POLICY_SHIFT 5 -#define QOS_CONTROL_TAG1D_MASK 0x07 - extern const int ieee802_1d_to_ac[8]; -static inline int WLAN_FC_IS_QOS_DATA(u16 fc) -{ - return (fc & 0x8C) == 0x88; -} - -#ifdef CONFIG_NET_SCHED -void ieee80211_install_qdisc(struct net_device *dev); -int ieee80211_qdisc_installed(struct net_device *dev); +u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, struct sta_info *sta, u16 tid); void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, struct sta_info *sta, u16 tid, u8 requeue); void ieee80211_requeue(struct ieee80211_local *local, int queue); -int ieee80211_wme_register(void); -void ieee80211_wme_unregister(void); -#else -static inline void ieee80211_install_qdisc(struct net_device *dev) -{ -} -static inline int ieee80211_qdisc_installed(struct net_device *dev) -{ - return 0; -} -static inline int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) -{ - return -EAGAIN; -} -static inline void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, - struct sta_info *sta, u16 tid, - u8 requeue) -{ -} -static inline void ieee80211_requeue(struct ieee80211_local *local, int queue) -{ -} -static inline int ieee80211_wme_register(void) -{ - return 0; -} -static inline void ieee80211_wme_unregister(void) -{ -} -#endif /* CONFIG_NET_SCHED */ #endif /* _WME_H */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 45709ada8fee..6db649480e8f 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -11,6 +11,8 @@ #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/compiler.h> +#include <linux/ieee80211.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -19,76 +21,30 @@ #include "aes_ccm.h" #include "wpa.h" -static int ieee80211_get_hdr_info(const struct sk_buff *skb, u8 **sa, u8 **da, - u8 *qos_tid, u8 **data, size_t *data_len) -{ - struct ieee80211_hdr *hdr; - size_t hdrlen; - u16 fc; - int a4_included; - u8 *pos; - - hdr = (struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - hdrlen = 24; - if ((fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) == - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - hdrlen += ETH_ALEN; - *sa = hdr->addr4; - *da = hdr->addr3; - } else if (fc & IEEE80211_FCTL_FROMDS) { - *sa = hdr->addr3; - *da = hdr->addr1; - } else if (fc & IEEE80211_FCTL_TODS) { - *sa = hdr->addr2; - *da = hdr->addr3; - } else { - *sa = hdr->addr2; - *da = hdr->addr1; - } - - if (fc & 0x80) - hdrlen += 2; - - *data = skb->data + hdrlen; - *data_len = skb->len - hdrlen; - - a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); - if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - fc & IEEE80211_STYPE_QOS_DATA) { - pos = (u8 *) &hdr->addr4; - if (a4_included) - pos += 6; - *qos_tid = pos[0] & 0x0f; - *qos_tid |= 0x80; /* qos_included flag */ - } else - *qos_tid = 0; - - return skb->len < hdrlen ? -1 : 0; -} - - ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { - u8 *data, *sa, *da, *key, *mic, qos_tid; + u8 *data, *key, *mic, key_offset; size_t data_len; - u16 fc; + unsigned int hdrlen; + struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; int authenticator; int wpa_test = 0; + int tail; - fc = tx->fc; - + hdr = (struct ieee80211_hdr *)skb->data; if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || - !WLAN_FC_DATA_PRESENT(fc)) + !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)) + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (skb->len < hdrlen) return TX_DROP; + data = skb->data + hdrlen; + data_len = skb->len - hdrlen; + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) && @@ -98,26 +54,27 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) return TX_CONTINUE; } - if (skb_tailroom(skb) < MICHAEL_MIC_LEN) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, - MICHAEL_MIC_LEN + TKIP_ICV_LEN, - GFP_ATOMIC))) { - printk(KERN_DEBUG "%s: failed to allocate more memory " - "for Michael MIC\n", tx->dev->name); - return TX_DROP; - } - } + tail = MICHAEL_MIC_LEN; + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + tail += TKIP_ICV_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < TKIP_IV_LEN)) + return TX_DROP; #if 0 authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */ #else authenticator = 1; #endif - key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; + /* At this point we know we're using ALG_TKIP. To get the MIC key + * we now will rely on the offset from the ieee80211_key_conf::key */ + key_offset = authenticator ? + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; + key = &tx->key->conf.key[key_offset]; mic = skb_put(skb, MICHAEL_MIC_LEN); - michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); + michael_mic(key, hdr, data, data_len, mic); return TX_CONTINUE; } @@ -126,48 +83,51 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { - u8 *data, *sa, *da, *key = NULL, qos_tid; + u8 *data, *key = NULL, key_offset; size_t data_len; - u16 fc; + unsigned int hdrlen; + struct ieee80211_hdr *hdr; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; int authenticator = 1, wpa_test = 0; DECLARE_MAC_BUF(mac); - fc = rx->fc; - /* * No way to verify the MIC if the hardware stripped it */ if (rx->status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; + hdr = (struct ieee80211_hdr *)skb->data; if (!rx->key || rx->key->conf.alg != ALG_TKIP || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc)) + !ieee80211_has_protected(hdr->frame_control) || + !ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; - if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len) - || data_len < MICHAEL_MIC_LEN) + hdrlen = ieee80211_hdrlen(hdr->frame_control); + if (skb->len < hdrlen + MICHAEL_MIC_LEN) return RX_DROP_UNUSABLE; - data_len -= MICHAEL_MIC_LEN; + data = skb->data + hdrlen; + data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; #if 0 authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */ #else authenticator = 1; #endif - key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; - michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); + /* At this point we know we're using ALG_TKIP. To get the MIC key + * we now will rely on the offset from the ieee80211_key_conf::key */ + key_offset = authenticator ? + NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; + key = &rx->key->conf.key[key_offset]; + michael_mic(key, hdr, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from " - "%s\n", rx->dev->name, print_mac(mac, sa)); - - mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, + mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, (void *) skb->data); return RX_DROP_UNUSABLE; } @@ -176,59 +136,55 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) skb_trim(skb, skb->len - MICHAEL_MIC_LEN); /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.iv32_rx[rx->queue] = rx->tkip_iv32; - rx->key->u.tkip.iv16_rx[rx->queue] = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->queue].iv32 = rx->tkip_iv32; + rx->key->u.tkip.rx[rx->queue].iv16 = rx->tkip_iv16; return RX_CONTINUE; } -static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, - struct sk_buff *skb, int test) +static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; - int hdrlen, len, tailneed; - u16 fc; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + unsigned int hdrlen; + int len, tail; u8 *pos; - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + /* hwaccel - with no need for preallocated room for IV/ICV */ + info->control.hw_key = &tx->key->conf; + return 0; + } + + hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - tailneed = 0; + tail = 0; else - tailneed = TKIP_ICV_LEN; - - if ((skb_headroom(skb) < TKIP_IV_LEN || - skb_tailroom(skb) < tailneed)) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, TKIP_IV_LEN, tailneed, - GFP_ATOMIC))) - return -1; - } + tail = TKIP_ICV_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < TKIP_IV_LEN)) + return -1; pos = skb_push(skb, TKIP_IV_LEN); memmove(pos, pos + TKIP_IV_LEN, hdrlen); pos += hdrlen; /* Increase IV for the frame */ - key->u.tkip.iv16++; - if (key->u.tkip.iv16 == 0) - key->u.tkip.iv32++; + key->u.tkip.tx.iv16++; + if (key->u.tkip.tx.iv16 == 0) + key->u.tkip.tx.iv32++; if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { - hdr = (struct ieee80211_hdr *)skb->data; - /* hwaccel - with preallocated room for IV */ - ieee80211_tkip_add_iv(pos, key, - (u8) (key->u.tkip.iv16 >> 8), - (u8) (((key->u.tkip.iv16 >> 8) | 0x20) & - 0x7f), - (u8) key->u.tkip.iv16); + ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16); - tx->control->key_idx = tx->key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; return 0; } @@ -246,28 +202,16 @@ ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int wpa_test = 0, test = 0; - tx->control->icv_len = TKIP_ICV_LEN; - tx->control->iv_len = TKIP_IV_LEN; ieee80211_tx_set_protected(tx); - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !wpa_test) { - /* hwaccel - with no need for preallocated room for IV/ICV */ - tx->control->key_idx = tx->key->conf.hw_key_idx; - return TX_CONTINUE; - } - - if (tkip_encrypt_skb(tx, skb, test) < 0) + if (tkip_encrypt_skb(tx, skb) < 0) return TX_DROP; if (tx->extra_frag) { int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (tkip_encrypt_skb(tx, tx->extra_frag[i], test) - < 0) + if (tkip_encrypt_skb(tx, tx->extra_frag[i]) < 0) return TX_DROP; } } @@ -280,16 +224,14 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc; int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; DECLARE_MAC_BUF(mac); - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) @@ -311,19 +253,12 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, - skb->len - hdrlen, rx->sta->addr, + skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); - if (res != TKIP_DECRYPT_OK || wpa_test) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: TKIP decrypt failed for RX " - "frame from %s (res=%d)\n", rx->dev->name, - print_mac(mac, rx->sta->addr), res); -#endif /* CONFIG_MAC80211_DEBUG */ + if (res != TKIP_DECRYPT_OK || wpa_test) return RX_DROP_UNUSABLE; - } /* Trim ICV */ skb_trim(skb, skb->len - TKIP_ICV_LEN); @@ -336,70 +271,68 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) } -static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, +static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, int encrypted) { - u16 fc; - int a4_included, qos_included; - u8 qos_tid, *fc_pos, *data, *sa, *da; - int len_a; - size_t data_len; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + __le16 mask_fc; + int a4_included; + u8 qos_tid; + u8 *b_0, *aad; + u16 data_len, len_a; + unsigned int hdrlen; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - fc_pos = (u8 *) &hdr->frame_control; - fc = fc_pos[0] ^ (fc_pos[1] << 8); - a4_included = (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); - - ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len); - data_len -= CCMP_HDR_LEN + (encrypted ? CCMP_MIC_LEN : 0); - if (qos_tid & 0x80) { - qos_included = 1; - qos_tid &= 0x0f; - } else - qos_included = 0; - /* First block, b_0 */ + b_0 = scratch + 3 * AES_BLOCK_LEN; + aad = scratch + 4 * AES_BLOCK_LEN; + /* + * Mask FC: zero subtype b4 b5 b6 + * Retry, PwrMgt, MoreData; set Protected + */ + mask_fc = hdr->frame_control; + mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY | + IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); + mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + len_a = hdrlen - 2; + a4_included = ieee80211_has_a4(hdr->frame_control); + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + data_len = skb->len - hdrlen - CCMP_HDR_LEN; + if (encrypted) + data_len -= CCMP_MIC_LEN; + + /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ /* Nonce: QoS Priority | A2 | PN */ b_0[1] = qos_tid; - memcpy(&b_0[2], hdr->addr2, 6); + memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, CCMP_PN_LEN); /* l(m) */ - b_0[14] = (data_len >> 8) & 0xff; - b_0[15] = data_len & 0xff; - + put_unaligned_be16(data_len, &b_0[14]); /* AAD (extra authenticate-only data) / masked 802.11 header * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ - - len_a = a4_included ? 28 : 22; - if (qos_included) - len_a += 2; - - aad[0] = 0; /* (len_a >> 8) & 0xff; */ - aad[1] = len_a & 0xff; - /* Mask FC: zero subtype b4 b5 b6 */ - aad[2] = fc_pos[0] & ~(BIT(4) | BIT(5) | BIT(6)); - /* Retry, PwrMgt, MoreData; set Protected */ - aad[3] = (fc_pos[1] & ~(BIT(3) | BIT(4) | BIT(5))) | BIT(6); - memcpy(&aad[4], &hdr->addr1, 18); + put_unaligned_be16(len_a, &aad[0]); + put_unaligned(mask_fc, (__le16 *)&aad[2]); + memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; aad[23] = 0; + if (a4_included) { - memcpy(&aad[24], hdr->addr4, 6); - aad[30] = 0; + memcpy(&aad[24], hdr->addr4, ETH_ALEN); + aad[30] = qos_tid; aad[31] = 0; - } else - memset(&aad[24], 0, 8); - if (qos_included) { - u8 *dpos = &aad[a4_included ? 30 : 24]; - - /* Mask QoS Control field */ - dpos[0] = qos_tid; - dpos[1] = 0; + } else { + memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); + aad[24] = qos_tid; } } @@ -429,36 +362,34 @@ static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr) } -static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, - struct sk_buff *skb, int test) +static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; - int hdrlen, len, tailneed; - u16 fc; - u8 *pos, *pn, *b_0, *aad, *scratch; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int hdrlen, len, tail; + u8 *pos, *pn; int i; - scratch = key->u.ccmp.tx_crypto_buf; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + /* hwaccel - with no need for preallocated room for CCMP " + * header or MIC fields */ + info->control.hw_key = &tx->key->conf; + return 0; + } - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); len = skb->len - hdrlen; if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - tailneed = 0; + tail = 0; else - tailneed = CCMP_MIC_LEN; - - if ((skb_headroom(skb) < CCMP_HDR_LEN || - skb_tailroom(skb) < tailneed)) { - I802_DEBUG_INC(tx->local->tx_expand_skb_head); - if (unlikely(pskb_expand_head(skb, CCMP_HDR_LEN, tailneed, - GFP_ATOMIC))) - return -1; - } + tail = CCMP_MIC_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < CCMP_HDR_LEN)) + return -1; pos = skb_push(skb, CCMP_HDR_LEN); memmove(pos, pos + CCMP_HDR_LEN, hdrlen); @@ -478,13 +409,13 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { /* hwaccel - with preallocated room for CCMP header */ - tx->control->key_idx = key->conf.hw_key_idx; + info->control.hw_key = &tx->key->conf; return 0; } pos += CCMP_HDR_LEN; - ccmp_special_blocks(skb, pn, b_0, aad, 0); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, b_0, aad, pos, len, + ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0); + ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, key->u.ccmp.tx_crypto_buf, pos, len, pos, skb_put(skb, CCMP_MIC_LEN)); return 0; @@ -495,28 +426,16 @@ ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int test = 0; - tx->control->icv_len = CCMP_MIC_LEN; - tx->control->iv_len = CCMP_HDR_LEN; ieee80211_tx_set_protected(tx); - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - /* hwaccel - with no need for preallocated room for CCMP " - * header or MIC fields */ - tx->control->key_idx = tx->key->conf.hw_key_idx; - return TX_CONTINUE; - } - - if (ccmp_encrypt_skb(tx, skb, test) < 0) + if (ccmp_encrypt_skb(tx, skb) < 0) return TX_DROP; if (tx->extra_frag) { int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (ccmp_encrypt_skb(tx, tx->extra_frag[i], test) - < 0) + if (ccmp_encrypt_skb(tx, tx->extra_frag[i]) < 0) return TX_DROP; } } @@ -528,8 +447,7 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; @@ -537,10 +455,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) int data_len; DECLARE_MAC_BUF(mac); - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; @@ -554,41 +471,19 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) (void) ccmp_hdr2pn(pn, skb->data + hdrlen); if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { -#ifdef CONFIG_MAC80211_DEBUG - u8 *ppn = key->u.ccmp.rx_pn[rx->queue]; - - printk(KERN_DEBUG "%s: CCMP replay detected for RX frame from " - "%s (RX PN %02x%02x%02x%02x%02x%02x <= prev. PN " - "%02x%02x%02x%02x%02x%02x)\n", rx->dev->name, - print_mac(mac, rx->sta->addr), - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5], - ppn[0], ppn[1], ppn[2], ppn[3], ppn[4], ppn[5]); -#endif /* CONFIG_MAC80211_DEBUG */ key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ - u8 *scratch, *b_0, *aad; - - scratch = key->u.ccmp.rx_crypto_buf; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; - - ccmp_special_blocks(skb, pn, b_0, aad, 1); + ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1); if (ieee80211_aes_ccm_decrypt( - key->u.ccmp.tfm, scratch, b_0, aad, + key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf, skb->data + hdrlen + CCMP_HDR_LEN, data_len, skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) { -#ifdef CONFIG_MAC80211_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: CCMP decrypt failed " - "for RX frame from %s\n", rx->dev->name, - print_mac(mac, rx->sta->addr)); -#endif /* CONFIG_MAC80211_DEBUG */ return RX_DROP_UNUSABLE; } } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa8d80c35e28..78892cf2b021 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -33,16 +33,16 @@ config NF_CONNTRACK into connections. This is required to do Masquerading or other kinds of Network - Address Translation (except for Fast NAT). It can also be used to - enhance packet filtering (see `Connection state match support' - below). + Address Translation. It can also be used to enhance packet + filtering (see `Connection state match support' below). To compile it as a module, choose M here. If unsure, say N. +if NF_CONNTRACK + config NF_CT_ACCT bool "Connection tracking flow accounting" depends on NETFILTER_ADVANCED - depends on NF_CONNTRACK help If this option is enabled, the connection tracking code will keep per-flow packet and byte counters. @@ -50,12 +50,20 @@ config NF_CT_ACCT Those counters can be used for flow-based accounting or the `connbytes' match. + Please note that currently this option only sets a default state. + You may change it at boot time with nf_conntrack.acct=0/1 kernel + paramater or by loading the nf_conntrack module with acct=0/1. + + You may also disable/enable it on a running system with: + sysctl net.netfilter.nf_conntrack_acct=0/1 + + This option will be removed in 2.6.29. + If unsure, say `N'. config NF_CONNTRACK_MARK bool 'Connection mark tracking support' depends on NETFILTER_ADVANCED - depends on NF_CONNTRACK help This option enables support for connection marks, used by the `CONNMARK' target and `connmark' match. Similar to the mark value @@ -64,7 +72,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' - depends on NF_CONNTRACK && NETWORK_SECMARK + depends on NETWORK_SECMARK default m if NETFILTER_ADVANCED=n help This option enables security markings to be applied to @@ -77,7 +85,6 @@ config NF_CONNTRACK_SECMARK config NF_CONNTRACK_EVENTS bool "Connection tracking events" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help If this option is enabled, the connection tracking code will @@ -88,7 +95,7 @@ config NF_CONNTRACK_EVENTS config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_DCCP help @@ -99,11 +106,10 @@ config NF_CT_PROTO_DCCP config NF_CT_PROTO_GRE tristate - depends on NF_CONNTRACK config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_SCTP help @@ -115,7 +121,6 @@ config NF_CT_PROTO_SCTP config NF_CT_PROTO_UDPLITE tristate 'UDP-Lite protocol connection tracking support' - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help With this option enabled, the layer 3 independent connection @@ -126,7 +131,6 @@ config NF_CT_PROTO_UDPLITE config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select TEXTSEARCH select TEXTSEARCH_KMP @@ -142,7 +146,6 @@ config NF_CONNTRACK_AMANDA config NF_CONNTRACK_FTP tristate "FTP protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help Tracking FTP connections is problematic: special helpers are @@ -157,7 +160,7 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" - depends on NF_CONNTRACK && (IPV6 || IPV6=n) + depends on (IPV6 || IPV6=n) depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most @@ -177,7 +180,6 @@ config NF_CONNTRACK_H323 config NF_CONNTRACK_IRC tristate "IRC protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help There is a commonly-used extension to IRC called @@ -193,7 +195,6 @@ config NF_CONNTRACK_IRC config NF_CONNTRACK_NETBIOS_NS tristate "NetBIOS name service protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help NetBIOS name service requests are sent as broadcast messages from an @@ -213,7 +214,6 @@ config NF_CONNTRACK_NETBIOS_NS config NF_CONNTRACK_PPTP tristate "PPtP protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CT_PROTO_GRE help @@ -233,7 +233,7 @@ config NF_CONNTRACK_PPTP config NF_CONNTRACK_SANE tristate "SANE protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED help SANE is a protocol for remote access to scanners as implemented @@ -247,7 +247,6 @@ config NF_CONNTRACK_SANE config NF_CONNTRACK_SIP tristate "SIP protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help SIP is an application-layer control protocol that can establish, @@ -260,7 +259,6 @@ config NF_CONNTRACK_SIP config NF_CONNTRACK_TFTP tristate "TFTP protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help TFTP connection tracking helper, this is required depending @@ -272,13 +270,29 @@ config NF_CONNTRACK_TFTP config NF_CT_NETLINK tristate 'Connection tracking netlink interface' - depends on NF_CONNTRACK select NETFILTER_NETLINK depends on NF_NAT=n || NF_NAT default m if NETFILTER_ADVANCED=n help This option enables support for a netlink-based userspace interface +# transparent proxy support +config NETFILTER_TPROXY + tristate "Transparent proxying support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option enables transparent proxying support, that is, + support for handling non-locally bound IPv4 TCP and UDP sockets. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. + + To compile it as a module, choose M here. If unsure, say N. + +endif # NF_CONNTRACK + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -286,11 +300,12 @@ config NETFILTER_XTABLES This is required if you intend to use any of ip_tables, ip6_tables or arp_tables. +if NETFILTER_XTABLES + # alphabetically ordered list of targets config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `CLASSIFY' target, which enables the user to set @@ -303,8 +318,6 @@ config NETFILTER_XT_TARGET_CLASSIFY config NETFILTER_XT_TARGET_CONNMARK tristate '"CONNMARK" target support' - depends on NETFILTER_XTABLES - depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK @@ -317,9 +330,20 @@ config NETFILTER_XT_TARGET_CONNMARK <file:Documentation/kbuild/modules.txt>. The module will be called ipt_CONNMARK.ko. If unsure, say `N'. +config NETFILTER_XT_TARGET_CONNSECMARK + tristate '"CONNSECMARK" target support' + depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK + default m if NETFILTER_ADVANCED=n + help + The CONNSECMARK target copies security markings from packets + to connections, and restores security markings from connections + to packets (if the packets are not already marked). This would + normally be used in conjunction with the SECMARK target. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' - depends on NETFILTER_XTABLES depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -336,7 +360,6 @@ config NETFILTER_XT_TARGET_DSCP config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help This option adds a `MARK' target, which allows you to create rules @@ -348,21 +371,8 @@ config NETFILTER_XT_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_NFQUEUE - tristate '"NFQUEUE" target Support' - depends on NETFILTER_XTABLES - depends on NETFILTER_ADVANCED - help - This target replaced the old obsolete QUEUE target. - - As opposed to QUEUE, it supports 65535 different queues, - not just one. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help This option enables the NFLOG target, which allows to LOG @@ -372,9 +382,19 @@ config NETFILTER_XT_TARGET_NFLOG To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NFQUEUE + tristate '"NFQUEUE" target Support' + depends on NETFILTER_ADVANCED + help + This target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NOTRACK tristate '"NOTRACK" target support' - depends on NETFILTER_XTABLES depends on IP_NF_RAW || IP6_NF_RAW depends on NF_CONNTRACK depends on NETFILTER_ADVANCED @@ -389,7 +409,6 @@ config NETFILTER_XT_TARGET_NOTRACK config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `RATEEST' target, which allows to measure @@ -398,9 +417,23 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TPROXY + tristate '"TPROXY" target support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `TPROXY' target, which is somewhat similar to + REDIRECT. It can only be used in the mangle table and is useful + to redirect traffic to a transparent proxy. It does _not_ depend + on Netfilter connection tracking and NAT, unlike REDIRECT. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TRACE tristate '"TRACE" target support' - depends on NETFILTER_XTABLES depends on IP_NF_RAW || IP6_NF_RAW depends on NETFILTER_ADVANCED help @@ -413,7 +446,7 @@ config NETFILTER_XT_TARGET_TRACE config NETFILTER_XT_TARGET_SECMARK tristate '"SECMARK" target support' - depends on NETFILTER_XTABLES && NETWORK_SECMARK + depends on NETWORK_SECMARK default m if NETFILTER_ADVANCED=n help The SECMARK target allows security marking of network @@ -421,21 +454,9 @@ config NETFILTER_XT_TARGET_SECMARK To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_CONNSECMARK - tristate '"CONNSECMARK" target support' - depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK - default m if NETFILTER_ADVANCED=n - help - The CONNSECMARK target copies security markings from packets - to connections, and restores security markings from connections - to packets (if the packets are not already marked). This would - normally be used in conjunction with the SECMARK target. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' - depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) + depends on (IPV6 || IPV6=n) default m if NETFILTER_ADVANCED=n ---help--- This option adds a `TCPMSS' target, which allows you to alter the @@ -462,7 +483,7 @@ config NETFILTER_XT_TARGET_TCPMSS config NETFILTER_XT_TARGET_TCPOPTSTRIP tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NETFILTER_XTABLES + depends on EXPERIMENTAL depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -471,7 +492,6 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `comment' dummy-match, which allows you to put @@ -482,7 +502,6 @@ config NETFILTER_XT_MATCH_COMMENT config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CT_ACCT @@ -495,7 +514,6 @@ config NETFILTER_XT_MATCH_CONNBYTES config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- @@ -504,7 +522,6 @@ config NETFILTER_XT_MATCH_CONNLIMIT config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK @@ -518,7 +535,6 @@ config NETFILTER_XT_MATCH_CONNMARK config NETFILTER_XT_MATCH_CONNTRACK tristate '"conntrack" connection tracking match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help @@ -532,7 +548,6 @@ config NETFILTER_XT_MATCH_CONNTRACK config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED default IP_DCCP help @@ -545,7 +560,6 @@ config NETFILTER_XT_MATCH_DCCP config NETFILTER_XT_MATCH_DSCP tristate '"dscp" and "tos" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `DSCP' match, which allows you to match against @@ -561,7 +575,6 @@ config NETFILTER_XT_MATCH_DSCP config NETFILTER_XT_MATCH_ESP tristate '"esp" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This match extension allows you to match a range of SPIs @@ -569,9 +582,23 @@ config NETFILTER_XT_MATCH_ESP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_HASHLIMIT + tristate '"hashlimit" match support' + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on NETFILTER_ADVANCED + help + This option adds a `hashlimit' match. + + As opposed to `limit', this match dynamically creates a hash table + of limit buckets, based on your selection of source/destination + addresses and/or ports. + + It enables you to express policies like `10kpps for any given + destination address' or `500pps from any given source address' + with a single rule. + config NETFILTER_XT_MATCH_HELPER tristate '"helper" match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help @@ -582,7 +609,6 @@ config NETFILTER_XT_MATCH_HELPER config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- This option adds a "iprange" match, which allows you to match based on @@ -593,7 +619,6 @@ config NETFILTER_XT_MATCH_IPRANGE config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option allows you to match the length of a packet against a @@ -603,7 +628,6 @@ config NETFILTER_XT_MATCH_LENGTH config NETFILTER_XT_MATCH_LIMIT tristate '"limit" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help limit matching allows you to control the rate at which a rule can be @@ -614,7 +638,6 @@ config NETFILTER_XT_MATCH_LIMIT config NETFILTER_XT_MATCH_MAC tristate '"mac" address match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help MAC matching allows you to match packets based on the source @@ -624,7 +647,6 @@ config NETFILTER_XT_MATCH_MAC config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help Netfilter mark matching allows you to match packets based on the @@ -633,9 +655,18 @@ config NETFILTER_XT_MATCH_MARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_MULTIPORT + tristate '"multiport" Multiple port match support' + depends on NETFILTER_ADVANCED + help + Multiport matching allows you to match TCP or UDP packets based on + a series of source or destination ports: normally a rule can only + match a single range of ports. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_OWNER tristate '"owner" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- Socket owner matching allows you to match locally-generated packets @@ -644,7 +675,7 @@ config NETFILTER_XT_MATCH_OWNER config NETFILTER_XT_MATCH_POLICY tristate 'IPsec "policy" match support' - depends on NETFILTER_XTABLES && XFRM + depends on XFRM default m if NETFILTER_ADVANCED=n help Policy matching allows you to match packets based on the @@ -653,20 +684,9 @@ config NETFILTER_XT_MATCH_POLICY To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_MATCH_MULTIPORT - tristate '"multiport" Multiple port match support' - depends on NETFILTER_XTABLES - depends on NETFILTER_ADVANCED - help - Multiport matching allows you to match TCP or UDP packets based on - a series of source or destination ports: normally a rule can only - match a single range of ports. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_MATCH_PHYSDEV tristate '"physdev" match support' - depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && BRIDGE_NETFILTER depends on NETFILTER_ADVANCED help Physdev packet matching matches against the physical bridge ports @@ -676,7 +696,6 @@ config NETFILTER_XT_MATCH_PHYSDEV config NETFILTER_XT_MATCH_PKTTYPE tristate '"pkttype" packet type match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help Packet type matching allows you to match a packet by @@ -689,7 +708,6 @@ config NETFILTER_XT_MATCH_PKTTYPE config NETFILTER_XT_MATCH_QUOTA tristate '"quota" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `quota' match, which allows to match on a @@ -700,7 +718,6 @@ config NETFILTER_XT_MATCH_QUOTA config NETFILTER_XT_MATCH_RATEEST tristate '"rateest" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select NETFILTER_XT_TARGET_RATEEST help @@ -711,7 +728,6 @@ config NETFILTER_XT_MATCH_RATEEST config NETFILTER_XT_MATCH_REALM tristate '"realm" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select NET_CLS_ROUTE help @@ -724,9 +740,26 @@ config NETFILTER_XT_MATCH_REALM If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_RECENT + tristate '"recent" match support' + depends on NETFILTER_ADVANCED + ---help--- + This match is used for creating one or many lists of recently + used addresses and then matching against that/those list(s). + + Short options are available by using 'iptables -m recent -h' + Official Website: <http://snowman.net/projects/ipt_recent/> + +config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + bool 'Enable obsolete /proc/net/ipt_recent' + depends on NETFILTER_XT_MATCH_RECENT && PROC_FS + ---help--- + This option enables the old /proc/net/ipt_recent interface, + which has been obsoleted by /proc/net/xt_recent. + config NETFILTER_XT_MATCH_SCTP tristate '"sctp" protocol match support (EXPERIMENTAL)' - depends on NETFILTER_XTABLES && EXPERIMENTAL + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_SCTP help @@ -737,9 +770,23 @@ config NETFILTER_XT_MATCH_SCTP If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_SOCKET + tristate '"socket" match support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `socket' match, which can be used to match + packets for which a TCP or UDP socket lookup finds a valid socket. + It can be used in combination with the MARK target and policy + routing to implement full featured non-locally bound sockets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_STATE tristate '"state" match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help @@ -751,7 +798,6 @@ config NETFILTER_XT_MATCH_STATE config NETFILTER_XT_MATCH_STATISTIC tristate '"statistic" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `statistic' match, which allows you to match @@ -761,7 +807,6 @@ config NETFILTER_XT_MATCH_STATISTIC config NETFILTER_XT_MATCH_STRING tristate '"string" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select TEXTSEARCH select TEXTSEARCH_KMP @@ -775,7 +820,6 @@ config NETFILTER_XT_MATCH_STRING config NETFILTER_XT_MATCH_TCPMSS tristate '"tcpmss" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `tcpmss' match, which allows you to examine the @@ -786,7 +830,6 @@ config NETFILTER_XT_MATCH_TCPMSS config NETFILTER_XT_MATCH_TIME tristate '"time" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- This option adds a "time" match, which allows you to match based on @@ -801,7 +844,6 @@ config NETFILTER_XT_MATCH_TIME config NETFILTER_XT_MATCH_U32 tristate '"u32" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- u32 allows you to extract quantities of up to 4 bytes from a packet, @@ -813,20 +855,8 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. -config NETFILTER_XT_MATCH_HASHLIMIT - tristate '"hashlimit" match support' - depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) - depends on NETFILTER_ADVANCED - help - This option adds a `hashlimit' match. - - As opposed to `limit', this match dynamically creates a hash table - of limit buckets, based on your selection of source/destination - addresses and/or ports. - - It enables you to express policies like `10kpps for any given - destination address' or `500pps from any given source address' - with a single rule. +endif # NETFILTER_XTABLES endmenu +source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c4b183f6422..da3d909e053f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +# transparent proxy support +obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o @@ -48,6 +51,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o @@ -76,10 +80,15 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o +obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o + +# IPVS +obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 292fa28146fb..a90ac83c5918 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -26,7 +26,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; +const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) @@ -51,7 +51,7 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo) } EXPORT_SYMBOL_GPL(nf_unregister_afinfo); -struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; +struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); static DEFINE_MUTEX(nf_hook_mutex); @@ -113,7 +113,7 @@ EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -155,7 +155,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -165,14 +165,6 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, unsigned int verdict; int ret = 0; -#ifdef CONFIG_NET_NS - struct net *net; - - net = indev == NULL ? dev_net(outdev) : dev_net(indev); - if (net != &init_net) - return 1; -#endif - /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); @@ -264,7 +256,7 @@ EXPORT_SYMBOL(proc_net_netfilter); void __init netfilter_init(void) { int i, h; - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } diff --git a/net/ipv4/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 09d0c3f35669..05048e403266 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -2,8 +2,8 @@ # IP Virtual Server configuration # menuconfig IP_VS - tristate "IP virtual server support (EXPERIMENTAL)" - depends on NETFILTER + tristate "IP virtual server support" + depends on NET && INET && NETFILTER ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -24,6 +24,14 @@ menuconfig IP_VS if IP_VS +config IP_VS_IPV6 + bool "IPv6 support for IPVS (DANGEROUS)" + depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + ---help--- + Add IPv6 support to IPVS. This is incomplete and might be dangerous. + + Say N if unsure. + config IP_VS_DEBUG bool "IP virtual server debugging" ---help--- @@ -33,7 +41,8 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - default "12" + range 8 20 + default 12 ---help--- The IPVS connection hash table uses the chaining scheme to handle hash collisions. Using a big IPVS connection hash table will greatly @@ -71,14 +80,20 @@ config IP_VS_PROTO_UDP This option enables support for load balancing UDP transport protocol. Say Y if unsure. +config IP_VS_PROTO_AH_ESP + bool + depends on UNDEFINED + config IP_VS_PROTO_ESP bool "ESP load balancing support" + select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" + select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. diff --git a/net/ipv4/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 30e85de9ffff..73a46fe1fe4c 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -6,8 +6,7 @@ ip_vs_proto-objs-y := ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o -ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o -ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o +ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 535abe0c45e7..201b8ea3020d 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -1,8 +1,6 @@ /* * ip_vs_app.c: Application module support for IPVS * - * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -610,7 +608,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, } -int ip_vs_app_init(void) +int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 65f1ba112752..9a24332fbed8 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> @@ -116,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) +static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, + const union nf_inet_addr *addr, + __be16 port) { - return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), + (__force u32)port, proto, ip_vs_conn_rnd) + & IP_VS_CONN_TAB_MASK; +#endif + return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -133,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) int ret; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -164,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -189,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -216,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { struct ip_vs_conn *cp; - cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) - cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, + d_port); - IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; @@ -259,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get out: ct_read_unlock(hash); - IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } @@ -275,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -283,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); + hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (d_addr == cp->caddr && d_port == cp->cport && - s_port == cp->dport && s_addr == cp->daddr && + if (cp->af == af && + ip_vs_addr_equal(af, d_addr, &cp->caddr) && + ip_vs_addr_equal(af, s_addr, &cp->daddr) && + d_port == cp->cport && s_port == cp->dport && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -300,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_unlock(hash); - IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - ret?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ret ? "hit" : "not hit"); return ret; } @@ -371,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) } } +#ifdef CONFIG_IP_VS_IPV6 +static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) +{ + switch (IP_VS_FWD_METHOD(cp)) { + case IP_VS_CONN_F_MASQ: + cp->packet_xmit = ip_vs_nat_xmit_v6; + break; + + case IP_VS_CONN_F_TUNNEL: + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + break; + + case IP_VS_CONN_F_DROUTE: + cp->packet_xmit = ip_vs_dr_xmit_v6; + break; + + case IP_VS_CONN_F_LOCALNODE: + cp->packet_xmit = ip_vs_null_xmit; + break; + + case IP_VS_CONN_F_BYPASS: + cp->packet_xmit = ip_vs_bypass_xmit_v6; + break; + } +} +#endif + static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) { @@ -404,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) cp->flags |= atomic_read(&dest->conn_flags); cp->dest = dest; - IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -446,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->daddr, cp->dport, - cp->vaddr, cp->vport, cp->protocol); + dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + &cp->vaddr, cp->vport, + cp->protocol); ip_vs_bind_dest(cp, dest); return dest; } else @@ -466,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) if (!dest) return; - IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -528,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct) !(dest->flags & IP_VS_DEST_F_AVAILABLE) || (sysctl_ip_vs_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { - IP_VS_DBG(9, "check_template: dest not available for " - "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "-> d:%u.%u.%u.%u:%d\n", - ip_vs_proto_name(ct->protocol), - NIPQUAD(ct->caddr), ntohs(ct->cport), - NIPQUAD(ct->vaddr), ntohs(ct->vport), - NIPQUAD(ct->daddr), ntohs(ct->dport)); + IP_VS_DBG_BUF(9, "check_template: dest not available for " + "protocol %s s:%s:%d v:%s:%d " + "-> d:%s:%d\n", + ip_vs_proto_name(ct->protocol), + IP_VS_DBG_ADDR(ct->af, &ct->caddr), + ntohs(ct->cport), + IP_VS_DBG_ADDR(ct->af, &ct->vaddr), + ntohs(ct->vport), + IP_VS_DBG_ADDR(ct->af, &ct->daddr), + ntohs(ct->dport)); /* * Invalidate the connection template @@ -627,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; @@ -642,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + cp->af = af; cp->protocol = proto; - cp->caddr = caddr; + ip_vs_addr_copy(af, &cp->caddr, caddr); cp->cport = cport; - cp->vaddr = vaddr; + ip_vs_addr_copy(af, &cp->vaddr, vaddr); cp->vport = vport; - cp->daddr = daddr; + ip_vs_addr_copy(af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); @@ -674,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport cp->timeout = 3*HZ; /* Bind its packet transmitter */ - ip_vs_bind_xmit(cp); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_bind_xmit_v6(cp); + else +#endif + ip_vs_bind_xmit(cp); if (unlikely(pp && atomic_read(&pp->appcnt))) ip_vs_bind_app(cp, pp); @@ -762,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) else { const struct ip_vs_conn *cp = v; - seq_printf(seq, - "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n", +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + seq_printf(seq, + "%-3s " NIP6_FMT " %04X " NIP6_FMT + " %04X " NIP6_FMT " %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + NIP6(cp->caddr.in6), ntohs(cp->cport), + NIP6(cp->vaddr.in6), ntohs(cp->vport), + NIP6(cp->daddr.in6), ntohs(cp->dport), + ip_vs_state_name(cp->protocol, cp->state), + (cp->timer.expires-jiffies)/HZ); + else +#endif + seq_printf(seq, + "%-3s %08X %04X %08X %04X" + " %08X %04X %-11s %7lu\n", + ip_vs_proto_name(cp->protocol), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ); } @@ -811,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) else { const struct ip_vs_conn *cp = v; - seq_printf(seq, - "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n", +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + seq_printf(seq, + "%-3s " NIP6_FMT " %04X " NIP6_FMT + " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n", + ip_vs_proto_name(cp->protocol), + NIP6(cp->caddr.in6), ntohs(cp->cport), + NIP6(cp->vaddr.in6), ntohs(cp->vport), + NIP6(cp->daddr.in6), ntohs(cp->dport), + ip_vs_state_name(cp->protocol, cp->state), + ip_vs_origin_name(cp->flags), + (cp->timer.expires-jiffies)/HZ); + else +#endif + seq_printf(seq, + "%-3s %08X %04X %08X %04X " + "%08X %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); @@ -967,7 +1052,7 @@ static void ip_vs_conn_flush(void) } -int ip_vs_conn_init(void) +int __init ip_vs_conn_init(void) { int idx; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 963981a9d501..958abf3e5f8c 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> @@ -41,6 +39,11 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#include <linux/netfilter_ipv6.h> +#endif + #include <net/ip_vs.h> @@ -62,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) +#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) const char *ip_vs_proto_name(unsigned proto) { @@ -76,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto) return "TCP"; case IPPROTO_ICMP: return "ICMP"; +#ifdef CONFIG_IP_VS_IPV6 + case IPPROTO_ICMPV6: + return "ICMPv6"; +#endif default: sprintf(buf, "IP_%d", proto); return buf; @@ -94,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.inpkts++; - dest->stats.inbytes += skb->len; + dest->stats.ustats.inpkts++; + dest->stats.ustats.inbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.inpkts++; - dest->svc->stats.inbytes += skb->len; + dest->svc->stats.ustats.inpkts++; + dest->svc->stats.ustats.inbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.inpkts++; - ip_vs_stats.inbytes += skb->len; + ip_vs_stats.ustats.inpkts++; + ip_vs_stats.ustats.inbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -117,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.outpkts++; - dest->stats.outbytes += skb->len; + dest->stats.ustats.outpkts++; + dest->stats.ustats.outbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.outpkts++; - dest->svc->stats.outbytes += skb->len; + dest->svc->stats.ustats.outpkts++; + dest->svc->stats.ustats.outbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.outpkts++; - ip_vs_stats.outbytes += skb->len; + ip_vs_stats.ustats.outpkts++; + ip_vs_stats.ustats.outbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -138,15 +146,15 @@ static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { spin_lock(&cp->dest->stats.lock); - cp->dest->stats.conns++; + cp->dest->stats.ustats.conns++; spin_unlock(&cp->dest->stats.lock); spin_lock(&svc->stats.lock); - svc->stats.conns++; + svc->stats.ustats.conns++; spin_unlock(&svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.conns++; + ip_vs_stats.ustats.conns++; spin_unlock(&ip_vs_stats.lock); } @@ -175,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 ports[2]) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __be16 dport; /* destination port to forward */ - __be32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + union nf_inet_addr snet; /* source network of the client, + after masking */ + + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); /* Mask saddr with the netmask to adjust template granularity */ - snet = iph->saddr & svc->netmask; +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); + else +#endif + snet.ip = iph.saddr.ip & svc->netmask; - IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u " - "mnet %u.%u.%u.%u\n", - NIPQUAD(iph->saddr), ntohs(ports[0]), - NIPQUAD(iph->daddr), ntohs(ports[1]), - NIPQUAD(snet)); + IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " + "mnet %s\n", + IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), + IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), + IP_VS_DBG_ADDR(svc->af, &snet)); /* * As far as we know, FTP is a very complicated network protocol, and @@ -206,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, ports[1]); + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, ports[1]); else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -230,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * for ftp service. */ if (svc->port != FTPPORT) - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, ports[1], - dest->addr, dest->port, + &dest->addr, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr, 0, + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -260,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ - if (svc->fwmark) - ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, - htonl(svc->fwmark), 0); - else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, + &fwmark, 0); + } else + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -284,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a template according to the service */ - if (svc->fwmark) - ct = ip_vs_conn_new(IPPROTO_IP, - snet, 0, - htonl(svc->fwmark), 0, - dest->addr, 0, + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_conn_new(svc->af, IPPROTO_IP, + &snet, 0, + &fwmark, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); - else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr, 0, + } else + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -312,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], - dest->addr, dport, + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, ports[0], + &iph.daddr, ports[1], + &dest->addr, dport, 0, dest); if (cp == NULL) { @@ -344,12 +368,12 @@ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; @@ -379,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* * Create a connection entry. */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], - dest->addr, dest->port?dest->port:pptr[1], + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], + &dest->addr, dest->port ? dest->port : pptr[1], 0, dest); if (cp == NULL) return NULL; - IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " - "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", - ip_vs_fwd_tag(cp), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", + ip_vs_fwd_tag(cp), + IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport), + cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; @@ -410,31 +434,39 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { __be16 _ports[2], *pptr; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; + int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; + else +#endif + unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); + /* if it is fwmark-based service, the cache_bypass sysctl is up - and the destination is RTN_UNICAST (and not local), then create + and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark - && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { + if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], - 0, 0, + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], + &daddr, 0, IP_VS_CONN_F_BYPASS, NULL); if (cp == NULL) @@ -475,7 +507,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * created, the TCP RST packet cannot be sent, instead that * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, + skb->dev); + else +#endif + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + return NF_DROP; } @@ -514,6 +553,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } +#ifdef CONFIG_IP_VS_IPV6 +static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) +{ + /* TODO IPv6: Find out what to do here for IPv6 */ + return 0; +} +#endif + /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in @@ -528,14 +575,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct iphdr *ciph = (struct iphdr *)(icmph + 1); if (inout) { - iph->saddr = cp->vaddr; + iph->saddr = cp->vaddr.ip; ip_send_check(iph); - ciph->daddr = cp->vaddr; + ciph->daddr = cp->vaddr.ip; ip_send_check(ciph); } else { - iph->daddr = cp->daddr; + iph->daddr = cp->daddr.ip; ip_send_check(iph); - ciph->saddr = cp->daddr; + ciph->saddr = cp->daddr.ip; ip_send_check(ciph); } @@ -562,21 +609,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, "Forwarding altered incoming ICMP"); } +#ifdef CONFIG_IP_VS_IPV6 +void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int inout) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int icmp_offset = sizeof(struct ipv6hdr); + struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + + icmp_offset); + struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); + + if (inout) { + iph->saddr = cp->vaddr.in6; + ciph->daddr = cp->vaddr.in6; + } else { + iph->daddr = cp->daddr.in6; + ciph->saddr = cp->daddr.in6; + } + + /* the TCP/UDP port */ + if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { + __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); + + if (inout) + ports[1] = cp->vport; + else + ports[0] = cp->dport; + } + + /* And finally the ICMP checksum */ + icmph->icmp6_cksum = 0; + /* TODO IPv6: is this correct for ICMPv6? */ + ip_vs_checksum_complete(skb, icmp_offset); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (inout) + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered outgoing ICMPv6"); + else + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered incoming ICMPv6"); +} +#endif + +/* Handle relevant response ICMP messages - forward to the right + * destination host. Used for NAT and local client. + */ +static int handle_response_icmp(int af, struct sk_buff *skb, + union nf_inet_addr *snet, + __u8 protocol, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + unsigned int offset, unsigned int ihl) +{ + unsigned int verdict = NF_DROP; + + if (IP_VS_FWD_METHOD(cp) != 0) { + IP_VS_ERR("shouldn't reach here, because the box is on the " + "half connection in the tun/dr module.\n"); + } + + /* Ensure the checksum is correct */ + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { + /* Failed checksum! */ + IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n", + IP_VS_DBG_ADDR(af, snet)); + goto out; + } + + if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) + offset += 2 * sizeof(__u16); + if (!skb_make_writable(skb, offset)) + goto out; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_nat_icmp_v6(skb, pp, cp, 1); + else +#endif + ip_vs_nat_icmp(skb, pp, cp, 1); + + /* do the statistics and put it back */ + ip_vs_out_stats(cp, skb); + + skb->ipvs_property = 1; + verdict = NF_ACCEPT; + +out: + __ip_vs_conn_put(cp); + + return verdict; +} + /* * Handle ICMP messages in the inside-to-outside direction (outgoing). - * Find any that might be relevant, check against existing connections, - * forward to the right destination host if relevant. + * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. - * (Only used in VS/NAT) */ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; - unsigned int offset, ihl, verdict; + unsigned int offset, ihl; + union nf_inet_addr snet; *related = 1; @@ -629,102 +767,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(skb, pp, cih, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); if (!cp) return NF_ACCEPT; - verdict = NF_DROP; + snet.ip = iph->saddr; + return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, + pp, offset, ihl); +} - if (IP_VS_FWD_METHOD(cp) != 0) { - IP_VS_ERR("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); +#ifdef CONFIG_IP_VS_IPV6 +static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) +{ + struct ipv6hdr *iph; + struct icmp6hdr _icmph, *ic; + struct ipv6hdr _ciph, *cih; /* The ip header contained + within the ICMP */ + struct ip_vs_iphdr ciph; + struct ip_vs_conn *cp; + struct ip_vs_protocol *pp; + unsigned int offset; + union nf_inet_addr snet; + + *related = 1; + + /* reassemble IP fragments */ + if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { + if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) + return NF_STOLEN; } - /* Ensure the checksum is correct */ - if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { - /* Failed checksum! */ - IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", - NIPQUAD(iph->saddr)); - goto out; + iph = ipv6_hdr(skb); + offset = sizeof(struct ipv6hdr); + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) + return NF_DROP; + + IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + NIP6(iph->saddr), NIP6(iph->daddr)); + + /* + * Work through seeing if this is for us. + * These checks are supposed to be in an order that means easy + * things are checked first to speed up processing.... however + * this means that some packets will manage to get a long way + * down this stack and then be rejected, but that's life. + */ + if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && + (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && + (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + *related = 0; + return NF_ACCEPT; } - if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) - offset += 2 * sizeof(__u16); - if (!skb_make_writable(skb, offset)) - goto out; + /* Now find the contained IP header */ + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ - ip_vs_nat_icmp(skb, pp, cp, 1); + pp = ip_vs_proto_get(cih->nexthdr); + if (!pp) + return NF_ACCEPT; - /* do the statistics and put it back */ - ip_vs_out_stats(cp, skb); + /* Is the embedded protocol header present? */ + /* TODO: we don't support fragmentation at the moment anyways */ + if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + return NF_ACCEPT; - skb->ipvs_property = 1; - verdict = NF_ACCEPT; + IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); - out: - __ip_vs_conn_put(cp); + offset += sizeof(struct ipv6hdr); - return verdict; + ip_vs_fill_iphdr(AF_INET6, cih, &ciph); + /* The embedded headers contain source and dest in reverse order */ + cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (!cp) + return NF_ACCEPT; + + ipv6_addr_copy(&snet.in6, &iph->saddr); + return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, + pp, offset, sizeof(struct ipv6hdr)); } +#endif -static inline int is_tcp_reset(const struct sk_buff *skb) +static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph); if (th == NULL) return 0; return th->rst; } +/* Handle response packets: rewrite addresses and send away... + * Used for NAT and local client. + */ +static unsigned int +handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int ihl) +{ + IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); + + if (!skb_make_writable(skb, ihl)) + goto drop; + + /* mangle the packet */ + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) + goto drop; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ipv6_hdr(skb)->saddr = cp->vaddr.in6; + else +#endif + { + ip_hdr(skb)->saddr = cp->vaddr.ip; + ip_send_check(ip_hdr(skb)); + } + + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (ip6_route_me_harder(skb) != 0) + goto drop; + } else +#endif + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) + goto drop; + + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); + + ip_vs_out_stats(cp, skb); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_conn_put(cp); + + skb->ipvs_property = 1; + + LeaveFunction(11); + return NF_ACCEPT; + +drop: + ip_vs_conn_put(cp); + kfree_skb(skb); + return NF_STOLEN; +} + /* * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. - * Check if outgoing packet belongs to the established ip_vs_conn, - * rewrite addresses of the packet and send it on its way... + * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int ihl; + int af; EnterFunction(11); + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; + if (skb->ipvs_property) return NF_ACCEPT; - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(skb, &related); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related, verdict = ip_vs_out_icmp_v6(skb, &related); - if (related) - return verdict; - iph = ip_hdr(skb); - } + if (related) + return verdict; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + } else +#endif + if (unlikely(iph.protocol == IPPROTO_ICMP)) { + int related, verdict = ip_vs_out_icmp(skb, &related); - pp = ip_vs_proto_get(iph->protocol); + if (related) + return verdict; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; /* reassemble IP fragments */ - if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && - !pp->dont_defrag)) { - if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) - return NF_STOLEN; - iph = ip_hdr(skb); - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related, verdict = ip_vs_out_icmp_v6(skb, &related); + + if (related) + return verdict; - ihl = iph->ihl << 2; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + } else +#endif + if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && + !pp->dont_defrag)) { + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) + return NF_STOLEN; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(skb, pp, iph, ihl, 0); + cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && @@ -732,21 +999,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, pp->protocol == IPPROTO_UDP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ihl, + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(iph->protocol, - iph->saddr, pptr[0])) { + if (ip_vs_lookup_real_service(af, iph.protocol, + &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ - if (iph->protocol != IPPROTO_TCP - || !is_tcp_reset(skb)) { - icmp_send(skb,ICMP_DEST_UNREACH, - ICMP_PORT_UNREACH, 0); + if (iph.protocol != IPPROTO_TCP + || !is_tcp_reset(skb, iph.len)) { +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + icmpv6_send(skb, + ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + 0, skb->dev); + else +#endif + icmp_send(skb, + ICMP_DEST_UNREACH, + ICMP_PORT_UNREACH, 0); return NF_DROP; } } @@ -756,41 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, return NF_ACCEPT; } - IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - - if (!skb_make_writable(skb, ihl)) - goto drop; - - /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) - goto drop; - ip_hdr(skb)->saddr = cp->vaddr; - ip_send_check(ip_hdr(skb)); - - /* For policy routing, packets originating from this - * machine itself may be routed differently to packets - * passing through. We want this packet to be routed as - * if it came from this machine itself. So re-compute - * the routing information. - */ - if (ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; - - IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); - - ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); - ip_vs_conn_put(cp); - - skb->ipvs_property = 1; - - LeaveFunction(11); - return NF_ACCEPT; - - drop: - ip_vs_conn_put(cp); - kfree_skb(skb); - return NF_STOLEN; + return handle_response(af, skb, pp, cp, iph.len); } @@ -806,9 +1049,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; + union nf_inet_addr snet; *related = 1; @@ -862,10 +1107,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(skb, pp, cih, offset, 1); - if (!cp) + cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + if (!cp) { + /* The packet could also belong to a local client */ + cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + if (cp) { + snet.ip = iph->saddr; + return handle_response_icmp(AF_INET, skb, &snet, + cih->protocol, cp, pp, + offset, ihl); + } return NF_ACCEPT; + } verdict = NF_DROP; @@ -890,6 +1145,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return verdict; } +#ifdef CONFIG_IP_VS_IPV6 +static int +ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) +{ + struct ipv6hdr *iph; + struct icmp6hdr _icmph, *ic; + struct ipv6hdr _ciph, *cih; /* The ip header contained + within the ICMP */ + struct ip_vs_iphdr ciph; + struct ip_vs_conn *cp; + struct ip_vs_protocol *pp; + unsigned int offset, verdict; + union nf_inet_addr snet; + + *related = 1; + + /* reassemble IP fragments */ + if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { + if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? + IP_DEFRAG_VS_IN : + IP_DEFRAG_VS_FWD)) + return NF_STOLEN; + } + + iph = ipv6_hdr(skb); + offset = sizeof(struct ipv6hdr); + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) + return NF_DROP; + + IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + NIP6(iph->saddr), NIP6(iph->daddr)); + + /* + * Work through seeing if this is for us. + * These checks are supposed to be in an order that means easy + * things are checked first to speed up processing.... however + * this means that some packets will manage to get a long way + * down this stack and then be rejected, but that's life. + */ + if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && + (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && + (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + *related = 0; + return NF_ACCEPT; + } + + /* Now find the contained IP header */ + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + + pp = ip_vs_proto_get(cih->nexthdr); + if (!pp) + return NF_ACCEPT; + + /* Is the embedded protocol header present? */ + /* TODO: we don't support fragmentation at the moment anyways */ + if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + return NF_ACCEPT; + + IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); + + offset += sizeof(struct ipv6hdr); + + ip_vs_fill_iphdr(AF_INET6, cih, &ciph); + /* The embedded headers contain source and dest in reverse order */ + cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (!cp) { + /* The packet could also belong to a local client */ + cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (cp) { + ipv6_addr_copy(&snet.in6, &iph->saddr); + return handle_response_icmp(AF_INET6, skb, &snet, + cih->nexthdr, + cp, pp, offset, + sizeof(struct ipv6hdr)); + } + return NF_ACCEPT; + } + + verdict = NF_DROP; + + /* do the statistics and put it back */ + ip_vs_in_stats(cp, skb); + if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) + offset += 2 * sizeof(__u16); + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); + /* do not touch skb anymore */ + + __ip_vs_conn_put(cp); + + return verdict; +} +#endif + + /* * Check if it's for virtual services, look it up, * and send it on its way... @@ -899,50 +1253,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int ret, restart; - int ihl; + int ret, restart, af; + + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* - * Big tappo: only PACKET_HOST (neither loopback nor mcasts) - * ... don't know why 1st test DOES NOT include 2nd (?) + * Big tappo: only PACKET_HOST, including loopback for local client + * Don't handle local packets on IPv6 for now */ - if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { - IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", - skb->pkt_type, - ip_hdr(skb)->protocol, - NIPQUAD(ip_hdr(skb)->daddr)); + if (unlikely(skb->pkt_type != PACKET_HOST)) { + IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", + skb->pkt_type, + iph.protocol, + IP_VS_DBG_ADDR(af, &iph.daddr)); return NF_ACCEPT; } - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { + if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - iph = ip_hdr(skb); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } /* Protocol supported? */ - pp = ip_vs_proto_get(iph->protocol); + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; - ihl = iph->ihl << 2; - /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(skb, pp, iph, ihl, 0); + cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(skb, pp, &v, &cp)) + /* For local client packets, it could be a response */ + cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + if (cp) + return handle_response(af, skb, pp, cp, iph.len); + + if (!pp->conn_schedule(af, skb, pp, &v, &cp)) return v; } @@ -986,14 +1344,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, * encorage the standby servers to update the connections timeout */ atomic_inc(&cp->in_pkts); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + if (af == AF_INET && + (ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] == sysctl_ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || - (cp->state == IP_VS_TCP_S_CLOSE))))) + (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || + (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); cp->old_state = cp->state; @@ -1024,6 +1384,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, return ip_vs_in_icmp(skb, &r, hooknum); } +#ifdef CONFIG_IP_VS_IPV6 +static unsigned int +ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int r; + + if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + return NF_ACCEPT; + + return ip_vs_in_icmp_v6(skb, &r, hooknum); +} +#endif + static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, forward packet through VS/DR, VS/TUN, @@ -1061,6 +1436,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC-1, }, +#ifdef CONFIG_IP_VS_IPV6 + /* After packet filtering, forward packet through VS/DR, VS/TUN, + * or VS/NAT(change destination), so that filtering rules can be + * applied to IPVS. */ + { + .hook = ip_vs_in, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_IN, + .priority = 100, + }, + /* After packet filtering, change source only for VS/NAT */ + { + .hook = ip_vs_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = 100, + }, + /* After packet filtering (but before ip_vs_out_icmp), catch icmp + * destined for 0.0.0.0/0, which is for incoming IPVS connections */ + { + .hook = ip_vs_forward_icmp_v6, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = 99, + }, + /* Before the netfilter connection tracking, exit from POST_ROUTING */ + { + .hook = ip_vs_post_routing, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC-1, + }, +#endif }; @@ -1071,10 +1483,12 @@ static int __init ip_vs_init(void) { int ret; + ip_vs_estimator_init(); + ret = ip_vs_control_init(); if (ret < 0) { IP_VS_ERR("can't setup control.\n"); - goto cleanup_nothing; + goto cleanup_estimator; } ip_vs_protocol_init(); @@ -1107,7 +1521,8 @@ static int __init ip_vs_init(void) cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - cleanup_nothing: + cleanup_estimator: + ip_vs_estimator_cleanup(); return ret; } @@ -1118,6 +1533,7 @@ static void __exit ip_vs_cleanup(void) ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); + ip_vs_estimator_cleanup(); IP_VS_INFO("ipvs unloaded.\n"); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 94c5767c8e01..0302cf3e5039 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> @@ -37,8 +35,13 @@ #include <net/net_namespace.h> #include <net/ip.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#include <net/ip6_route.h> +#endif #include <net/route.h> #include <net/sock.h> +#include <net/genetlink.h> #include <asm/uaccess.h> @@ -92,6 +95,26 @@ int ip_vs_get_debug_level(void) } #endif +#ifdef CONFIG_IP_VS_IPV6 +/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ +static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) +{ + struct rt6_info *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = *addr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) + return 1; + + return 0; +} +#endif /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -283,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) +ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; - return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + + return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; } @@ -318,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by <protocol,addr,port> in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); + hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, + svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* @@ -364,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* * Get service by {proto,addr,port} in the service table. */ -static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) +static inline struct ip_vs_service * +__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, + __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ - if ((svc->addr == vaddr) + if ((svc->af == af) + && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->protocol == protocol)) { /* HIT */ @@ -390,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) /* * Get service by {fwmark} in the service table. */ -static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) +static inline struct ip_vs_service * +__ip_vs_svc_fwm_get(int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; @@ -399,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) hash = ip_vs_svc_fwm_hashkey(fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark) { + if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ atomic_inc(&svc->usecnt); return svc; @@ -410,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -419,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) + if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) goto out; /* * Check the table hashed by <protocol,addr,port> * for "full" addressed entries */ - svc = __ip_vs_service_get(protocol, vaddr, vport); + svc = __ip_vs_service_get(af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -436,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); + svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -444,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_get(protocol, vaddr, 0); + svc = __ip_vs_service_get(af, protocol, vaddr, 0); } out: read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", - fwmark, ip_vs_proto_name(protocol), - NIPQUAD(vaddr), ntohs(vport), - svc?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", + fwmark, ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), + svc ? "hit" : "not hit"); return svc; } @@ -480,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) +static inline unsigned ip_vs_rs_hashkey(int af, + const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif - return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) + return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) & IP_VS_RTAB_MASK; } @@ -504,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) * Hash by proto,addr,port, * which are the parameters of the real service. */ - hash = ip_vs_rs_hashkey(dest->addr, dest->port); + hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); + list_add(&dest->d_list, &ip_vs_rtable[hash]); return 1; @@ -531,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by <proto,addr,port> in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -540,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Check for "full" addressed entries * Return the first found entry */ - hash = ip_vs_rs_hashkey(daddr, dport); + hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { - if ((dest->addr == daddr) + if ((dest->af == af) + && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { @@ -562,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest; @@ -570,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination for the given service */ list_for_each_entry(dest, &svc->destinations, n_list) { - if ((dest->addr == daddr) && (dest->port == dport)) { + if ((dest->af == svc->af) + && ip_vs_addr_equal(svc->af, &dest->addr, daddr) + && (dest->port == dport)) { /* HIT */ return dest; } @@ -589,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol) +struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, + __be16 dport, + const union nf_inet_addr *vaddr, + __be16 vport, __u16 protocol) { struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(0, protocol, vaddr, vport); + svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -616,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -624,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination in trash */ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { - IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " - "dest->refcnt=%d\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); - if (dest->addr == daddr && + IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " + "dest->refcnt=%d\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (dest->af == svc->af && + ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && (svc->fwmark || - (dest->vaddr == svc->addr && + (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ return dest; @@ -644,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Try to purge the destination from trash if not referenced */ if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " - "from trash\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port)); + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " + "from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -685,9 +743,11 @@ static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(stats, 0, (char *)&stats->lock - (char *)stats); - spin_unlock_bh(&stats->lock); + + memset(&stats->ustats, 0, sizeof(stats->ustats)); ip_vs_zero_estimator(stats); + + spin_unlock_bh(&stats->lock); } /* @@ -695,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) */ static void __ip_vs_update_dest(struct ip_vs_service *svc, - struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) + struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) { int conn_flags; @@ -704,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc, conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } + } else +#endif + if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { @@ -748,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, * Create a destination for the given service */ static int -ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, +ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; @@ -756,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, EnterFunction(2); - atype = inet_addr_type(&init_net, udest->addr); - if (atype != RTN_LOCAL && atype != RTN_UNICAST) - return -EINVAL; +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + atype = ipv6_addr_type(&udest->addr.in6); + if ((!(atype & IPV6_ADDR_UNICAST) || + atype & IPV6_ADDR_LINKLOCAL) && + !__ip_vs_addr_is_local_v6(&udest->addr.in6)) + return -EINVAL; + } else +#endif + { + atype = inet_addr_type(&init_net, udest->addr.ip); + if (atype != RTN_LOCAL && atype != RTN_UNICAST) + return -EINVAL; + } dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); if (dest == NULL) { @@ -766,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, return -ENOMEM; } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - dest->addr = udest->addr; + ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -795,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, * Add a destination into an existing service */ static int -ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; int ret; @@ -815,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Check if the dest already exists in the list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest != NULL) { IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); return -EEXIST; @@ -828,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, daddr, dport); + dest = ip_vs_trash_get_dest(svc, &daddr, dport); + if (dest != NULL) { - IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " - "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", - NIPQUAD(daddr), ntohs(dport), - atomic_read(&dest->refcnt), - dest->vfwmark, - NIPQUAD(dest->vaddr), - ntohs(dest->vport)); + IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " + "dest->refcnt=%d, service %u/%s:%u\n", + IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + atomic_read(&dest->refcnt), + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->vaddr), + ntohs(dest->vport)); + __ip_vs_update_dest(svc, dest, udest); /* @@ -857,7 +942,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); return 0; @@ -887,7 +973,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -901,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Edit a destination in the given service */ static int -ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; EnterFunction(2); @@ -920,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Lookup the destination list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); return -ENOENT; @@ -937,7 +1027,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); /* call the update_service, because server weight may be changed */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -976,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) atomic_dec(&dest->svc->refcnt); kfree(dest); } else { - IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " - "dest->refcnt=%d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " + "dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ip_vs_dest_trash); atomic_inc(&dest->refcnt); } @@ -1000,12 +1092,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, */ list_del(&dest->n_list); svc->num_dests--; - if (svcupd) { - /* - * Call the update_service function of its scheduler - */ - svc->scheduler->update_service(svc); - } + + /* + * Call the update_service function of its scheduler + */ + if (svcupd && svc->scheduler->update_service) + svc->scheduler->update_service(svc); } @@ -1013,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, * Delete a destination server in the given service */ static int -ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) +ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; __be16 dport = udest->port; EnterFunction(2); - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); return -ENOENT; @@ -1056,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) +ip_vs_add_service(struct ip_vs_service_user_kern *u, + struct ip_vs_service **svc_p) { int ret = 0; struct ip_vs_scheduler *sched = NULL; @@ -1074,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) goto out_mod_dec; } +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out_err; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out_err; + } + } +#endif + svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); if (svc == NULL) { IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); @@ -1085,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_set(&svc->usecnt, 1); atomic_set(&svc->refcnt, 0); + svc->af = u->af; svc->protocol = u->protocol; - svc->addr = u->addr; + ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; svc->flags = u->flags; @@ -1110,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_inc(&ip_vs_nullsvc_counter); ip_vs_new_estimator(&svc->stats); - ip_vs_num_services++; + + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1145,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) * Edit a service and bind it with a new scheduler */ static int -ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) +ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; int ret = 0; @@ -1161,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) } old_sched = sched; +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out; + } + } +#endif + write_lock_bh(&__ip_vs_svc_lock); /* @@ -1182,7 +1305,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ if ((ret = ip_vs_unbind_scheduler(svc))) { old_sched = sched; - goto out; + goto out_unlock; } /* @@ -1201,12 +1324,15 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ ip_vs_bind_scheduler(svc, old_sched); old_sched = sched; - goto out; + goto out_unlock; } } - out: + out_unlock: write_unlock_bh(&__ip_vs_svc_lock); +#ifdef CONFIG_IP_VS_IPV6 + out: +#endif if (old_sched) ip_vs_scheduler_put(old_sched); @@ -1225,7 +1351,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; - ip_vs_num_services--; + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services--; + ip_vs_kill_estimator(&svc->stats); /* Unbind scheduler */ @@ -1591,7 +1720,7 @@ static struct ctl_table vs_vars[] = { { .ctl_name = 0 } }; -struct ctl_path net_vs_ctl_path[] = { +const struct ctl_path net_vs_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, { .procname = "vs", }, @@ -1660,6 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) +__acquires(__ip_vs_svc_lock) { read_lock_bh(&__ip_vs_svc_lock); @@ -1713,6 +1843,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) +__releases(__ip_vs_svc_lock) { read_unlock_bh(&__ip_vs_svc_lock); } @@ -1733,15 +1864,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; - if (iter->table == ip_vs_svc_table) - seq_printf(seq, "%s %08X:%04X %s ", - ip_vs_proto_name(svc->protocol), - ntohl(svc->addr), - ntohs(svc->port), - svc->scheduler->name); - else + if (iter->table == ip_vs_svc_table) { +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ", + ip_vs_proto_name(svc->protocol), + NIP6(svc->addr.in6), + ntohs(svc->port), + svc->scheduler->name); + else +#endif + seq_printf(seq, "%s %08X:%04X %s ", + ip_vs_proto_name(svc->protocol), + ntohl(svc->addr.ip), + ntohs(svc->port), + svc->scheduler->name); + } else { seq_printf(seq, "FWM %08X %s ", svc->fwmark, svc->scheduler->name); + } if (svc->flags & IP_VS_SVC_F_PERSISTENT) seq_printf(seq, "persistent %d %08X\n", @@ -1751,13 +1892,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); list_for_each_entry(dest, &svc->destinations, n_list) { - seq_printf(seq, - " -> %08X:%04X %-7s %-6d %-10d %-10d\n", - ntohl(dest->addr), ntohs(dest->port), - ip_vs_fwd_name(atomic_read(&dest->conn_flags)), - atomic_read(&dest->weight), - atomic_read(&dest->activeconns), - atomic_read(&dest->inactconns)); +#ifdef CONFIG_IP_VS_IPV6 + if (dest->af == AF_INET6) + seq_printf(seq, + " -> [" NIP6_FMT "]:%04X" + " %-7s %-6d %-10d %-10d\n", + NIP6(dest->addr.in6), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + else +#endif + seq_printf(seq, + " -> %08X:%04X " + "%-7s %-6d %-10d %-10d\n", + ntohl(dest->addr.ip), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + } } return 0; @@ -1786,7 +1943,9 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats; +struct ip_vs_stats ip_vs_stats = { + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), +}; #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) @@ -1799,20 +1958,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, - ip_vs_stats.inpkts, ip_vs_stats.outpkts, - (unsigned long long) ip_vs_stats.inbytes, - (unsigned long long) ip_vs_stats.outbytes); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, + ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, + (unsigned long long) ip_vs_stats.ustats.inbytes, + (unsigned long long) ip_vs_stats.ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.cps, - ip_vs_stats.inpps, - ip_vs_stats.outpps, - ip_vs_stats.inbps, - ip_vs_stats.outbps); + ip_vs_stats.ustats.cps, + ip_vs_stats.ustats.inpps, + ip_vs_stats.ustats.outpps, + ip_vs_stats.ustats.inbps, + ip_vs_stats.ustats.outbps); spin_unlock_bh(&ip_vs_stats.lock); return 0; @@ -1887,14 +2046,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, }; +static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, + struct ip_vs_service_user *usvc_compat) +{ + usvc->af = AF_INET; + usvc->protocol = usvc_compat->protocol; + usvc->addr.ip = usvc_compat->addr; + usvc->port = usvc_compat->port; + usvc->fwmark = usvc_compat->fwmark; + + /* Deep copy of sched_name is not needed here */ + usvc->sched_name = usvc_compat->sched_name; + + usvc->flags = usvc_compat->flags; + usvc->timeout = usvc_compat->timeout; + usvc->netmask = usvc_compat->netmask; +} + +static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, + struct ip_vs_dest_user *udest_compat) +{ + udest->addr.ip = udest_compat->addr; + udest->port = udest_compat->port; + udest->conn_flags = udest_compat->conn_flags; + udest->weight = udest_compat->weight; + udest->u_threshold = udest_compat->u_threshold; + udest->l_threshold = udest_compat->l_threshold; +} + static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; unsigned char arg[MAX_ARG_LEN]; - struct ip_vs_service_user *usvc; + struct ip_vs_service_user *usvc_compat; + struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; - struct ip_vs_dest_user *udest; + struct ip_vs_dest_user *udest_compat; + struct ip_vs_dest_user_kern udest; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1934,35 +2123,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } - usvc = (struct ip_vs_service_user *)arg; - udest = (struct ip_vs_dest_user *)(usvc + 1); + usvc_compat = (struct ip_vs_service_user *)arg; + udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); + + /* We only use the new structs internally, so copy userspace compat + * structs to extended internal versions */ + ip_vs_copy_usvc_compat(&usvc, usvc_compat); + ip_vs_copy_udest_compat(&udest, udest_compat); if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ - if (!usvc->fwmark && !usvc->addr && !usvc->port) { + if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { ret = ip_vs_zero_all(); goto out_unlock; } } /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", - usvc->protocol, NIPQUAD(usvc->addr), - ntohs(usvc->port), usvc->sched_name); + usvc.protocol, NIPQUAD(usvc.addr.ip), + ntohs(usvc.port), usvc.sched_name); ret = -EFAULT; goto out_unlock; } /* Lookup the exact service by <protocol, addr, port> or fwmark */ - if (usvc->fwmark == 0) - svc = __ip_vs_service_get(usvc->protocol, - usvc->addr, usvc->port); + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_get(usvc->fwmark); + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD - && (svc == NULL || svc->protocol != usvc->protocol)) { + && (svc == NULL || svc->protocol != usvc.protocol)) { ret = -ESRCH; goto out_unlock; } @@ -1972,10 +2166,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(usvc, &svc); + ret = ip_vs_add_service(&usvc, &svc); break; case IP_VS_SO_SET_EDIT: - ret = ip_vs_edit_service(svc, usvc); + ret = ip_vs_edit_service(svc, &usvc); break; case IP_VS_SO_SET_DEL: ret = ip_vs_del_service(svc); @@ -1986,13 +2180,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = ip_vs_zero_service(svc); break; case IP_VS_SO_SET_ADDDEST: - ret = ip_vs_add_dest(svc, udest); + ret = ip_vs_add_dest(svc, &udest); break; case IP_VS_SO_SET_EDITDEST: - ret = ip_vs_edit_dest(svc, udest); + ret = ip_vs_edit_dest(svc, &udest); break; case IP_VS_SO_SET_DELDEST: - ret = ip_vs_del_dest(svc, udest); + ret = ip_vs_del_dest(svc, &udest); break; default: ret = -EINVAL; @@ -2015,7 +2209,7 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { spin_lock_bh(&src->lock); - memcpy(dst, src, (char*)&src->lock - (char*)src); + memcpy(dst, &src->ustats, sizeof(*dst)); spin_unlock_bh(&src->lock); } @@ -2023,7 +2217,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; - dst->addr = src->addr; + dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); @@ -2045,6 +2239,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2060,6 +2258,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2081,13 +2283,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; + union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_get(get->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); else - svc = __ip_vs_service_get(get->protocol, - get->addr, get->port); + svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, + get->port); + if (svc) { int count = 0; struct ip_vs_dest *dest; @@ -2097,7 +2301,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; - entry.addr = dest->addr; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); entry.weight = atomic_read(&dest->weight); @@ -2222,13 +2426,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_service_entry *entry; struct ip_vs_service *svc; + union nf_inet_addr addr; entry = (struct ip_vs_service_entry *)arg; + addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_get(entry->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); else - svc = __ip_vs_service_get(entry->protocol, - entry->addr, entry->port); + svc = __ip_vs_service_get(AF_INET, entry->protocol, + &addr, entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2307,8 +2513,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .owner = THIS_MODULE, }; +/* + * Generic Netlink interface + */ + +/* IPVS genetlink family */ +static struct genl_family ip_vs_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, +}; + +/* Policy used for first-level command attributes */ +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { + [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { + [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, + .len = IP_VS_IFNAME_MAXLEN }, + [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { + [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, + .len = sizeof(struct ip_vs_flags) }, + [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { + [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, + struct ip_vs_stats *stats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) + return -EMSGSIZE; + + spin_lock_bh(&stats->lock); + + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + + spin_unlock_bh(&stats->lock); + + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + spin_unlock_bh(&stats->lock); + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_service(struct sk_buff *skb, + struct ip_vs_service *svc) +{ + struct nlattr *nl_service; + struct ip_vs_flags flags = { .flags = svc->flags, + .mask = ~0 }; + + nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + if (!nl_service) + return -EMSGSIZE; + + NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); + + if (svc->fwmark) { + NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); + } else { + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); + NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); + } + + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); + + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_service); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_service); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_service(struct sk_buff *skb, + struct ip_vs_service *svc, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_SERVICE); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_service(skb, svc) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_services(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0, i; + int start = cb->args[0]; + struct ip_vs_service *svc; + + mutex_lock(&__ip_vs_mutex); + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + cb->args[0] = idx; + + return skb->len; +} + +static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; + struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; + + /* Parse mandatory identifying service fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + return -EINVAL; + + nla_af = attrs[IPVS_SVC_ATTR_AF]; + nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; + nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; + nla_port = attrs[IPVS_SVC_ATTR_PORT]; + nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; + + if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) + return -EINVAL; + + usvc->af = nla_get_u16(nla_af); +#ifdef CONFIG_IP_VS_IPV6 + if (usvc->af != AF_INET && usvc->af != AF_INET6) +#else + if (usvc->af != AF_INET) +#endif + return -EAFNOSUPPORT; + + if (nla_fwmark) { + usvc->protocol = IPPROTO_TCP; + usvc->fwmark = nla_get_u32(nla_fwmark); + } else { + usvc->protocol = nla_get_u16(nla_protocol); + nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); + usvc->port = nla_get_u16(nla_port); + usvc->fwmark = 0; + } + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_sched, *nla_flags, *nla_timeout, + *nla_netmask; + struct ip_vs_flags flags; + struct ip_vs_service *svc; + + nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; + nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; + nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; + + if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) + return -EINVAL; + + nla_memcpy(&flags, nla_flags, sizeof(flags)); + + /* prefill flags from service if it already exists */ + if (usvc->fwmark) + svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); + else + svc = __ip_vs_service_get(usvc->af, usvc->protocol, + &usvc->addr, usvc->port); + if (svc) { + usvc->flags = svc->flags; + ip_vs_service_put(svc); + } else + usvc->flags = 0; + + /* set new flags from userland */ + usvc->flags = (usvc->flags & ~flags.mask) | + (flags.flags & flags.mask); + usvc->sched_name = nla_data(nla_sched); + usvc->timeout = nla_get_u32(nla_timeout); + usvc->netmask = nla_get_u32(nla_netmask); + } + + return 0; +} + +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +{ + struct ip_vs_service_user_kern usvc; + int ret; + + ret = ip_vs_genl_parse_service(&usvc, nla, 0); + if (ret) + return ERR_PTR(ret); + + if (usvc.fwmark) + return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + else + return __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); +} + +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) +{ + struct nlattr *nl_dest; + + nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + if (!nl_dest) + return -EMSGSIZE; + + NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); + NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); + + NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, + atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, + atomic_read(&dest->activeconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, + atomic_read(&dest->inactconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, + atomic_read(&dest->persistconns)); + + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_dest); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_dest); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DEST); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_dest(skb, dest) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dests(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + + mutex_lock(&__ip_vs_mutex); + + /* Try to find the service for which to dump destinations */ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, + IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + goto out_err; + + svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc) || svc == NULL) + goto out_err; + + /* Dump the destinations */ + list_for_each_entry(dest, &svc->destinations, n_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + +nla_put_failure: + cb->args[0] = idx; + ip_vs_service_put(svc); + +out_err: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; + struct nlattr *nla_addr, *nla_port; + + /* Parse mandatory identifying destination fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + return -EINVAL; + + nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; + nla_port = attrs[IPVS_DEST_ATTR_PORT]; + + if (!(nla_addr && nla_port)) + return -EINVAL; + + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); + udest->port = nla_get_u16(nla_port); + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, + *nla_l_thresh; + + nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; + nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; + nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; + nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + + if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) + return -EINVAL; + + udest->conn_flags = nla_get_u32(nla_fwd) + & IP_VS_CONN_F_FWD_MASK; + udest->weight = nla_get_u32(nla_weight); + udest->u_threshold = nla_get_u32(nla_u_thresh); + udest->l_threshold = nla_get_u32(nla_l_thresh); + } + + return 0; +} + +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid) +{ + struct nlattr *nl_daemon; + + nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + if (!nl_daemon) + return -EMSGSIZE; + + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); + NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); + + nla_nest_end(skb, nl_daemon); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_daemon); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid, + struct netlink_callback *cb) +{ + void *hdr; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DAEMON); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemons(struct sk_buff *skb, + struct netlink_callback *cb) +{ + mutex_lock(&__ip_vs_mutex); + if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, + ip_vs_master_mcast_ifn, + ip_vs_master_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[0] = 1; + } + + if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, + ip_vs_backup_mcast_ifn, + ip_vs_backup_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[1] = 1; + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_new_daemon(struct nlattr **attrs) +{ + if (!(attrs[IPVS_DAEMON_ATTR_STATE] && + attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && + attrs[IPVS_DAEMON_ATTR_SYNC_ID])) + return -EINVAL; + + return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); +} + +static int ip_vs_genl_del_daemon(struct nlattr **attrs) +{ + if (!attrs[IPVS_DAEMON_ATTR_STATE]) + return -EINVAL; + + return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); +} + +static int ip_vs_genl_set_config(struct nlattr **attrs) +{ + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) + t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) + t.tcp_fin_timeout = + nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) + t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); + + return ip_vs_set_timeout(&t); +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(info->attrs); + goto out; + } else if (cmd == IPVS_CMD_NEW_DAEMON || + cmd == IPVS_CMD_DEL_DAEMON) { + + struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + + if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || + nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, + info->attrs[IPVS_CMD_ATTR_DAEMON], + ip_vs_daemon_policy)) { + ret = -EINVAL; + goto out; + } + + if (cmd == IPVS_CMD_NEW_DAEMON) + ret = ip_vs_genl_new_daemon(daemon_attrs); + else + ret = ip_vs_genl_del_daemon(daemon_attrs); + goto out; + } else if (cmd == IPVS_CMD_ZERO && + !info->attrs[IPVS_CMD_ATTR_SERVICE]) { + ret = ip_vs_zero_all(); + goto out; + } + + /* All following commands require a service argument, so check if we + * received a valid one. We need a full service specification when + * adding / editing a service. Only identifying members otherwise. */ + if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) + need_full_svc = 1; + + ret = ip_vs_genl_parse_service(&usvc, + info->attrs[IPVS_CMD_ATTR_SERVICE], + need_full_svc); + if (ret) + goto out; + + /* Lookup the exact service by <protocol, addr, port> or fwmark */ + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); + else + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + + /* Unless we're adding a new service, the service must already exist */ + if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { + ret = -ESRCH; + goto out; + } + + /* Destination commands require a valid destination argument. For + * adding / editing a destination, we need a full destination + * specification. */ + if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || + cmd == IPVS_CMD_DEL_DEST) { + if (cmd != IPVS_CMD_DEL_DEST) + need_full_dest = 1; + + ret = ip_vs_genl_parse_dest(&udest, + info->attrs[IPVS_CMD_ATTR_DEST], + need_full_dest); + if (ret) + goto out; + } + + switch (cmd) { + case IPVS_CMD_NEW_SERVICE: + if (svc == NULL) + ret = ip_vs_add_service(&usvc, &svc); + else + ret = -EEXIST; + break; + case IPVS_CMD_SET_SERVICE: + ret = ip_vs_edit_service(svc, &usvc); + break; + case IPVS_CMD_DEL_SERVICE: + ret = ip_vs_del_service(svc); + break; + case IPVS_CMD_NEW_DEST: + ret = ip_vs_add_dest(svc, &udest); + break; + case IPVS_CMD_SET_DEST: + ret = ip_vs_edit_dest(svc, &udest); + break; + case IPVS_CMD_DEL_DEST: + ret = ip_vs_del_dest(svc, &udest); + break; + case IPVS_CMD_ZERO: + ret = ip_vs_zero_service(svc); + break; + default: + ret = -EINVAL; + } + +out: + if (svc) + ip_vs_service_put(svc); + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *reply; + int ret, cmd, reply_cmd; + + cmd = info->genlhdr->cmd; + + if (cmd == IPVS_CMD_GET_SERVICE) + reply_cmd = IPVS_CMD_NEW_SERVICE; + else if (cmd == IPVS_CMD_GET_INFO) + reply_cmd = IPVS_CMD_SET_INFO; + else if (cmd == IPVS_CMD_GET_CONFIG) + reply_cmd = IPVS_CMD_SET_CONFIG; + else { + IP_VS_ERR("unknown Generic Netlink command\n"); + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&__ip_vs_mutex); + + reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); + if (reply == NULL) + goto nla_put_failure; + + switch (cmd) { + case IPVS_CMD_GET_SERVICE: + { + struct ip_vs_service *svc; + + svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc)) { + ret = PTR_ERR(svc); + goto out_err; + } else if (svc) { + ret = ip_vs_genl_fill_service(msg, svc); + ip_vs_service_put(svc); + if (ret) + goto nla_put_failure; + } else { + ret = -ESRCH; + goto out_err; + } + + break; + } + + case IPVS_CMD_GET_CONFIG: + { + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); +#ifdef CONFIG_IP_VS_PROTO_TCP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, + t.tcp_fin_timeout); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); +#endif + + break; + } + + case IPVS_CMD_GET_INFO: + NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); + NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, + IP_VS_CONN_TAB_SIZE); + break; + } + + genlmsg_end(msg, reply); + ret = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + IP_VS_ERR("not enough space in Netlink message\n"); + ret = -EMSGSIZE; + +out_err: + nlmsg_free(msg); +out: + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + + +static struct genl_ops ip_vs_genl_ops[] __read_mostly = { + { + .cmd = IPVS_CMD_NEW_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_SERVICE, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + .dumpit = ip_vs_genl_dump_services, + .policy = ip_vs_cmd_policy, + }, + { + .cmd = IPVS_CMD_NEW_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .dumpit = ip_vs_genl_dump_dests, + }, + { + .cmd = IPVS_CMD_NEW_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DAEMON, + .flags = GENL_ADMIN_PERM, + .dumpit = ip_vs_genl_dump_daemons, + }, + { + .cmd = IPVS_CMD_SET_CONFIG, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_CONFIG, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_GET_INFO, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_ZERO, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_FLUSH, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_set_cmd, + }, +}; + +static int __init ip_vs_genl_register(void) +{ + int ret, i; -int ip_vs_control_init(void) + ret = genl_register_family(&ip_vs_genl_family); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { + ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); + if (ret) + goto err_out; + } + return 0; + +err_out: + genl_unregister_family(&ip_vs_genl_family); + return ret; +} + +static void ip_vs_genl_unregister(void) +{ + genl_unregister_family(&ip_vs_genl_family); +} + +/* End of Generic Netlink interface definitions */ + + +int __init ip_vs_control_init(void) { int ret; int idx; @@ -2321,6 +3396,13 @@ int ip_vs_control_init(void) return ret; } + ret = ip_vs_genl_register(); + if (ret) { + IP_VS_ERR("cannot register Generic Netlink interface.\n"); + nf_unregister_sockopt(&ip_vs_sockopts); + return ret; + } + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); @@ -2335,8 +3417,6 @@ int ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_rtable[idx]); } - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); - spin_lock_init(&ip_vs_stats.lock); ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ @@ -2357,6 +3437,7 @@ void ip_vs_control_cleanup(void) unregister_sysctl_table(sysctl_header); proc_net_remove(&init_net, "ip_vs_stats"); proc_net_remove(&init_net, "ip_vs"); + ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index dcf5d46aaa5e..a16943fd72f1 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -1,8 +1,6 @@ /* * IPVS: Destination Hashing scheduling module * - * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * Inspired by the consistent hashing scheduler patch from @@ -220,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -235,6 +233,10 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .name = "dh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, @@ -244,7 +246,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = static int __init ip_vs_dh_init(void) { - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_dh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index dfa0d713c801..2eb2860dabb5 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -1,8 +1,6 @@ /* * ip_vs_est.c: simple rate estimator for IPVS * - * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -19,6 +17,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> +#include <linux/list.h> #include <net/ip_vs.h> @@ -46,28 +45,11 @@ */ -struct ip_vs_estimator -{ - struct ip_vs_estimator *next; - struct ip_vs_stats *stats; - - u32 last_conns; - u32 last_inpkts; - u32 last_outpkts; - u64 last_inbytes; - u64 last_outbytes; - - u32 cps; - u32 inpps; - u32 outpps; - u32 inbps; - u32 outbps; -}; - +static void estimation_timer(unsigned long arg); -static struct ip_vs_estimator *est_list = NULL; -static DEFINE_RWLOCK(est_lock); -static struct timer_list est_timer; +static LIST_HEAD(est_list); +static DEFINE_SPINLOCK(est_lock); +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); static void estimation_timer(unsigned long arg) { @@ -78,125 +60,107 @@ static void estimation_timer(unsigned long arg) u64 n_inbytes, n_outbytes; u32 rate; - read_lock(&est_lock); - for (e = est_list; e; e = e->next) { - s = e->stats; + spin_lock(&est_lock); + list_for_each_entry(e, &est_list, list) { + s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - n_conns = s->conns; - n_inpkts = s->inpkts; - n_outpkts = s->outpkts; - n_inbytes = s->inbytes; - n_outbytes = s->outbytes; + n_conns = s->ustats.conns; + n_inpkts = s->ustats.inpkts; + n_outpkts = s->ustats.outpkts; + n_inbytes = s->ustats.inbytes; + n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ rate = (n_conns - e->last_conns)<<9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps)>>2; - s->cps = (e->cps+0x1FF)>>10; + s->ustats.cps = (e->cps+0x1FF)>>10; rate = (n_inpkts - e->last_inpkts)<<9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps)>>2; - s->inpps = (e->inpps+0x1FF)>>10; + s->ustats.inpps = (e->inpps+0x1FF)>>10; rate = (n_outpkts - e->last_outpkts)<<9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps)>>2; - s->outpps = (e->outpps+0x1FF)>>10; + s->ustats.outpps = (e->outpps+0x1FF)>>10; rate = (n_inbytes - e->last_inbytes)<<4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps)>>2; - s->inbps = (e->inbps+0xF)>>5; + s->ustats.inbps = (e->inbps+0xF)>>5; rate = (n_outbytes - e->last_outbytes)<<4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps)>>2; - s->outbps = (e->outbps+0xF)>>5; + s->ustats.outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } - read_unlock(&est_lock); + spin_unlock(&est_lock); mod_timer(&est_timer, jiffies + 2*HZ); } -int ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est; + struct ip_vs_estimator *est = &stats->est; - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOMEM; + INIT_LIST_HEAD(&est->list); - est->stats = stats; - est->last_conns = stats->conns; - est->cps = stats->cps<<10; + est->last_conns = stats->ustats.conns; + est->cps = stats->ustats.cps<<10; - est->last_inpkts = stats->inpkts; - est->inpps = stats->inpps<<10; + est->last_inpkts = stats->ustats.inpkts; + est->inpps = stats->ustats.inpps<<10; - est->last_outpkts = stats->outpkts; - est->outpps = stats->outpps<<10; + est->last_outpkts = stats->ustats.outpkts; + est->outpps = stats->ustats.outpps<<10; - est->last_inbytes = stats->inbytes; - est->inbps = stats->inbps<<5; + est->last_inbytes = stats->ustats.inbytes; + est->inbps = stats->ustats.inbps<<5; - est->last_outbytes = stats->outbytes; - est->outbps = stats->outbps<<5; + est->last_outbytes = stats->ustats.outbytes; + est->outbps = stats->ustats.outbps<<5; - write_lock_bh(&est_lock); - est->next = est_list; - if (est->next == NULL) { - setup_timer(&est_timer, estimation_timer, 0); - est_timer.expires = jiffies + 2*HZ; - add_timer(&est_timer); - } - est_list = est; - write_unlock_bh(&est_lock); - return 0; + spin_lock_bh(&est_lock); + list_add(&est->list, &est_list); + spin_unlock_bh(&est_lock); } void ip_vs_kill_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est, **pest; - int killed = 0; - - write_lock_bh(&est_lock); - pest = &est_list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - *pest = est->next; - kfree(est); - killed++; - } - if (killed && est_list == NULL) - del_timer_sync(&est_timer); - write_unlock_bh(&est_lock); + struct ip_vs_estimator *est = &stats->est; + + spin_lock_bh(&est_lock); + list_del(&est->list); + spin_unlock_bh(&est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *e; + struct ip_vs_estimator *est = &stats->est; + + /* set counters zero, caller must hold the stats->lock lock */ + est->last_inbytes = 0; + est->last_outbytes = 0; + est->last_conns = 0; + est->last_inpkts = 0; + est->last_outpkts = 0; + est->cps = 0; + est->inpps = 0; + est->outpps = 0; + est->inbps = 0; + est->outbps = 0; +} - write_lock_bh(&est_lock); - for (e = est_list; e; e = e->next) { - if (e->stats != stats) - continue; - - /* set counters zero */ - e->last_conns = 0; - e->last_inpkts = 0; - e->last_outpkts = 0; - e->last_inbytes = 0; - e->last_outbytes = 0; - e->cps = 0; - e->inpps = 0; - e->outpps = 0; - e->inbps = 0; - e->outbps = 0; - } - write_unlock_bh(&est_lock); +int __init ip_vs_estimator_init(void) +{ + mod_timer(&est_timer, jiffies + 2 * HZ); + return 0; +} + +void ip_vs_estimator_cleanup(void) +{ + del_timer_sync(&est_timer); } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 59aa166b7678..2e7dbd8b73a4 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -1,8 +1,6 @@ /* * ip_vs_ftp.c: IPVS ftp application module * - * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: @@ -142,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __be32 from; + union nf_inet_addr from; __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; int ret; +#ifdef CONFIG_IP_VS_IPV6 + /* This application helper doesn't work with IPv6 yet, + * so turn this into a no-op for IPv6 packets + */ + if (cp->af == AF_INET6) + return 1; +#endif + *diff = 0; /* Only useful for established sessions */ @@ -168,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, sizeof(SERVER_STRING)-1, ')', - &from, &port, + &from.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", - NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); + NIPQUAD(from.ip), ntohs(port), + NIPQUAD(cp->caddr.ip), 0); /* * Now update or create an connection entry for it */ - n_cp = ip_vs_conn_out_get(iph->protocol, from, port, - cp->caddr, 0); + n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, + &cp->caddr, 0); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - cp->caddr, 0, - cp->vaddr, port, - from, port, + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &cp->caddr, 0, + &cp->vaddr, port, + &from, port, IP_VS_CONN_F_NO_CPORT, cp->dest); if (!n_cp) @@ -198,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Replace the old passive address with the new one */ - from = n_cp->vaddr; + from.ip = n_cp->vaddr.ip; port = n_cp->vport; - sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), + sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip), (ntohs(port)>>8)&255, ntohs(port)&255); buf_len = strlen(buf); @@ -245,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __be32 to; + union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; +#ifdef CONFIG_IP_VS_IPV6 + /* This application helper doesn't work with IPv6 yet, + * so turn this into a no-op for IPv6 packets + */ + if (cp->af == AF_INET6) + return 1; +#endif + /* no diff required for incoming packets */ *diff = 0; @@ -293,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING, sizeof(CLIENT_STRING)-1, - '\r', &to, &port, + '\r', &to.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", - NIPQUAD(to), ntohs(port)); + NIPQUAD(to.ip), ntohs(port)); /* Passive mode off */ cp->app_data = NULL; @@ -308,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), - NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); + NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); - n_cp = ip_vs_conn_in_get(iph->protocol, - to, port, - cp->vaddr, htons(ntohs(cp->vport)-1)); + n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1)); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - to, port, - cp->vaddr, htons(ntohs(cp->vport)-1), - cp->daddr, htons(ntohs(cp->dport)-1), + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1), + &cp->daddr, htons(ntohs(cp->dport)-1), 0, cp->dest); if (!n_cp) diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 3888642706ad..6ecef3518cac 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection scheduling module * - * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or @@ -98,7 +96,6 @@ struct ip_vs_lblc_entry { * IPVS lblc hash table */ struct ip_vs_lblc_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -125,31 +122,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblc_entry, which is a mapping of a destionation - * IP address to a server. - */ -static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) -{ - struct ip_vs_lblc_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - atomic_inc(&dest->refcnt); - en->dest = dest; - - return en; -} - - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -175,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblc_table. * returns bool success. */ -static int +static void ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { - unsigned hash; - - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } + unsigned hash = ip_vs_lblc_hashkey(en->addr); - /* - * Hash by destination IP address - */ - hash = ip_vs_lblc_hashkey(en->addr); - - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblc_entry associated with supplied parameters. + * Get ip_vs_lblc_entry associated with supplied parameters. Called under read + * lock */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblc_hashkey(addr); struct ip_vs_lblc_entry *en; - hash = ip_vs_lblc_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; - read_lock(&tbl->lock); + return NULL; +} - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; + +/* + * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP + * address to a server. Called under write lock. + */ +static inline struct ip_vs_lblc_entry * +ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblc_entry *en; + + en = ip_vs_lblc_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); + return NULL; } - } - read_unlock(&tbl->lock); + en->addr = daddr; + en->lastuse = jiffies; - return NULL; + atomic_inc(&dest->refcnt); + en->dest = dest; + + ip_vs_lblc_hash(tbl, en); + } else if (en->dest != dest) { + atomic_dec(&en->dest->refcnt); + atomic_inc(&dest->refcnt); + en->dest = dest; + } + + return en; } @@ -232,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) */ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) { - int i; struct ip_vs_lblc_entry *en, *nxt; + int i; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - write_lock(&tbl->lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) +static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct ip_vs_lblc_entry *en, *nxt; for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + sysctl_ip_vs_lblc_expiration)) @@ -264,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -283,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) */ static void ip_vs_lblc_check_expire(unsigned long data) { - struct ip_vs_lblc_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblc_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblc_entry *en, *nxt; - tbl = (struct ip_vs_lblc_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblc_full_check(tbl); + ip_vs_lblc_full_check(svc); tbl->counter = 1; goto out; } @@ -310,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -319,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -338,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblc_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblc_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -354,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { INIT_LIST_HEAD(&tbl->bucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -363,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -382,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) ip_vs_lblc_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblc_table)); + sizeof(*tbl)); return 0; } -static int ip_vs_lblc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -450,7 +422,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -486,47 +458,55 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblc_table *tbl; - struct ip_vs_lblc_entry *en; + struct ip_vs_lblc_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblc_entry *en; IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblc_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblc_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblc_new(iph->daddr, dest); - if (en == NULL) { - return NULL; - } - ip_vs_lblc_hash(tbl, en); - } else { - dest = en->dest; - if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - atomic_dec(&en->dest->refcnt); - atomic_inc(&dest->refcnt); - en->dest = dest; - } + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* + * If the destination is not available, i.e. it's in the trash, + * we must ignore it, as it may be removed from under our feet, + * if someone drops our reference count. Our caller only makes + * sure that destinations, that are not in the trash, are not + * moved to the trash, while we are scheduling. But anyone can + * free up entries from the trash at any time. + */ + + if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) + dest = en->dest; } - en->lastuse = jiffies; + read_unlock(&svc->sched_lock); + /* If the destination has a weight and is not overloaded, use it */ + if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) + goto out; + + /* No cache entry or it is invalid, time to schedule */ + dest = __ip_vs_lblc_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; + } + + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblc_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), - NIPQUAD(dest->addr), + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -541,9 +521,12 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .name = "lblc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, - .update_service = ip_vs_lblc_update_svc, .schedule = ip_vs_lblc_schedule, }; @@ -552,7 +535,6 @@ static int __init ip_vs_lblc_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index daa260eb21cf..1f75ea83bcf8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection with Replication scheduler * - * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or @@ -108,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) return NULL; } - e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC); + e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e == NULL) { IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); return NULL; @@ -118,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) e->dest = dest; /* link it to the list */ - write_lock(&set->lock); e->next = set->list; set->list = e; atomic_inc(&set->size); - write_unlock(&set->lock); set->lastmod = jiffies; return e; @@ -133,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { struct ip_vs_dest_list *e, **ep; - write_lock(&set->lock); for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { if (e->dest == dest) { /* HIT */ @@ -146,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) } ep = &e->next; } - write_unlock(&set->lock); } static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) @@ -176,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { least = e->dest; @@ -190,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted least load */ @@ -209,11 +201,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) loh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -231,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { most = e->dest; @@ -241,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted most load */ @@ -258,11 +247,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) moh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(most->addr), ntohs(most->port), + NIPQUAD(most->addr.ip), ntohs(most->port), atomic_read(&most->activeconns), atomic_read(&most->refcnt), atomic_read(&most->weight), moh); @@ -286,7 +274,6 @@ struct ip_vs_lblcr_entry { * IPVS lblcr hash table */ struct ip_vs_lblcr_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -313,32 +300,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. - */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) -{ - struct ip_vs_lblcr_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - /* initilize its dest set */ - atomic_set(&(en->set.size), 0); - en->set.list = NULL; - rwlock_init(&en->set.lock); - - return en; -} - - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -360,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblcr_table. * returns bool success. */ -static int +static void ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { - unsigned hash; - - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } + unsigned hash = ip_vs_lblcr_hashkey(en->addr); - /* - * Hash by destination IP address - */ - hash = ip_vs_lblcr_hashkey(en->addr); - - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblcr_entry associated with supplied parameters. + * Get ip_vs_lblcr_entry associated with supplied parameters. Called under + * read lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblcr_hashkey(addr); struct ip_vs_lblcr_entry *en; - hash = ip_vs_lblcr_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; - read_lock(&tbl->lock); + return NULL; +} - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; + +/* + * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination + * IP address to a server. Called under write lock. + */ +static inline struct ip_vs_lblcr_entry * +ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblcr_entry *en; + + en = ip_vs_lblcr_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); + return NULL; } + + en->addr = daddr; + en->lastuse = jiffies; + + /* initilize its dest set */ + atomic_set(&(en->set.size), 0); + en->set.list = NULL; + rwlock_init(&en->set.lock); + + ip_vs_lblcr_hash(tbl, en); } - read_unlock(&tbl->lock); + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); - return NULL; + return en; } @@ -420,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) int i; struct ip_vs_lblcr_entry *en, *nxt; + /* No locking required, only called during cleanup. */ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - write_lock(&tbl->lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); - atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) +static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; @@ -440,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, now)) @@ -449,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -468,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) */ static void ip_vs_lblcr_check_expire(unsigned long data) { - struct ip_vs_lblcr_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - tbl = (struct ip_vs_lblcr_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblcr_full_check(tbl); + ip_vs_lblcr_full_check(svc); tbl->counter = 1; goto out; } @@ -495,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -504,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -522,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblcr_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblcr_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -538,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { INIT_LIST_HEAD(&tbl->bucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -547,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -566,22 +535,16 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) ip_vs_lblcr_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblcr_table)); - - return 0; -} - + sizeof(*tbl)); -static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc) -{ return 0; } static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -635,7 +598,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -671,51 +634,79 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblcr_table *tbl; - struct ip_vs_lblcr_entry *en; + struct ip_vs_lblcr_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblcr_entry *en; IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblcr_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblcr_new(iph->daddr); - if (en == NULL) { - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - ip_vs_lblcr_hash(tbl, en); - } else { + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* Get the least loaded destination */ + read_lock(&en->set.lock); dest = ip_vs_dest_set_min(&en->set); - if (!dest || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - } + read_unlock(&en->set.lock); + + /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && - jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { + time_after(jiffies, en->set.lastmod + + sysctl_ip_vs_lblcr_expiration)) { struct ip_vs_dest *m; + + write_lock(&en->set.lock); m = ip_vs_dest_set_max(&en->set); if (m) ip_vs_dest_set_erase(&en->set, m); + write_unlock(&en->set.lock); } + + /* If the destination is not overloaded, use it */ + if (dest && !is_overloaded(dest, svc)) { + read_unlock(&svc->sched_lock); + goto out; + } + + /* The cache entry is invalid, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + read_unlock(&svc->sched_lock); + return NULL; + } + + /* Update our cache entry */ + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); + } + read_unlock(&svc->sched_lock); + + if (dest) + goto out; + + /* No cache entry, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; } - en->lastuse = jiffies; + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblcr_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), - NIPQUAD(dest->addr), + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -730,9 +721,12 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .name = "lblcr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, - .update_service = ip_vs_lblcr_update_svc, .schedule = ip_vs_lblcr_schedule, }; @@ -741,7 +735,6 @@ static int __init ip_vs_lblcr_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index d88fef90a641..b69f808ac461 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -1,8 +1,6 @@ /* * IPVS: Least-Connection Scheduling module * - * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -22,24 +20,6 @@ #include <net/ip_vs.h> -static int ip_vs_lc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) { @@ -87,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } if (least) - IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->inactconns)); + IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->inactconns)); return least; } @@ -100,16 +80,16 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .name = "lc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, - .init_service = ip_vs_lc_init_svc, - .done_service = ip_vs_lc_done_svc, - .update_service = ip_vs_lc_update_svc, + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_lc_schedule, }; static int __init ip_vs_lc_init(void) { - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; } diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index bc2a9e5f2a7b..9a2d8033f08f 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -1,8 +1,6 @@ /* * IPVS: Never Queue scheduling module * - * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -39,27 +37,6 @@ #include <net/ip_vs.h> -static int -ip_vs_nq_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { @@ -122,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; out: - IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "NQ: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -138,16 +115,16 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .name = "nq", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, - .init_service = ip_vs_nq_init_svc, - .done_service = ip_vs_nq_done_svc, - .update_service = ip_vs_nq_update_svc, + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_nq_schedule, }; static int __init ip_vs_nq_init(void) { - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_nq_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 4b1c16cbb16b..0791f9e08feb 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -1,8 +1,6 @@ /* * ip_vs_proto.c: transport protocol load balancing support for IPVS * - * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * @@ -45,7 +43,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; /* * register an ipvs protocol */ -static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp) +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) { unsigned hash = IP_VS_PROTO_HASH(pp->protocol); @@ -153,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state) } -void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, - const struct sk_buff *skb, - int offset, - const char *msg) +static void +ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) { char buf[128]; struct iphdr _iph, *ih; @@ -191,8 +189,63 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } +#ifdef CONFIG_IP_VS_IPV6 +static void +ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) +{ + char buf[192]; + struct ipv6hdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else if (ih->nexthdr == IPPROTO_FRAGMENT) + sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag", + pp->name, NIP6(ih->saddr), + NIP6(ih->daddr)); + else { + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), + sizeof(_ports), _ports); + if (pptr == NULL) + sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT, + pp->name, + NIP6(ih->saddr), + NIP6(ih->daddr)); + else + sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u", + pp->name, + NIP6(ih->saddr), + ntohs(pptr[0]), + NIP6(ih->daddr), + ntohs(pptr[1])); + } + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} +#endif + + +void +ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (skb->protocol == htons(ETH_P_IPV6)) + ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); + else +#endif + ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); +} + -int ip_vs_protocol_init(void) +int __init ip_vs_protocol_init(void) { char protocols[64]; #define REGISTER_PROTOCOL(p) \ diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c new file mode 100644 index 000000000000..80ab0c8e5b4a --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -0,0 +1,235 @@ +/* + * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS + * + * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 + * Wensong Zhang <wensong@linuxvirtualserver.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation; + * + */ + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> + +#include <net/ip_vs.h> + + +/* TODO: + +struct isakmp_hdr { + __u8 icookie[8]; + __u8 rcookie[8]; + __u8 np; + __u8 version; + __u8 xchgtype; + __u8 flags; + __u32 msgid; + __u32 length; +}; + +*/ + +#define PORT_ISAKMP 500 + + +static struct ip_vs_conn * +ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) +{ + struct ip_vs_conn *cp; + + if (likely(!inverse)) { + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->saddr, + htons(PORT_ISAKMP), + &iph->daddr, + htons(PORT_ISAKMP)); + } else { + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->daddr, + htons(PORT_ISAKMP), + &iph->saddr, + htons(PORT_ISAKMP)); + } + + if (!cp) { + /* + * We are not sure if the packet is from our + * service, so our conn_schedule hook should return NF_ACCEPT + */ + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); + } + + return cp; +} + + +static struct ip_vs_conn * +ah_esp_conn_out_get(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + struct ip_vs_conn *cp; + + if (likely(!inverse)) { + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->saddr, + htons(PORT_ISAKMP), + &iph->daddr, + htons(PORT_ISAKMP)); + } else { + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->daddr, + htons(PORT_ISAKMP), + &iph->saddr, + htons(PORT_ISAKMP)); + } + + if (!cp) { + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); + } + + return cp; +} + + +static int +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + int *verdict, struct ip_vs_conn **cpp) +{ + /* + * AH/ESP is only related traffic. Pass the packet to IP stack. + */ + *verdict = NF_ACCEPT; + return 0; +} + + +static void +ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ + char buf[256]; + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else + sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} + +#ifdef CONFIG_IP_VS_IPV6 +static void +ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ + char buf[256]; + struct ipv6hdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else + sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT, + pp->name, NIP6(ih->saddr), + NIP6(ih->daddr)); + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} +#endif + +static void +ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (skb->protocol == htons(ETH_P_IPV6)) + ah_esp_debug_packet_v6(pp, skb, offset, msg); + else +#endif + ah_esp_debug_packet_v4(pp, skb, offset, msg); +} + + +static void ah_esp_init(struct ip_vs_protocol *pp) +{ + /* nothing to do now */ +} + + +static void ah_esp_exit(struct ip_vs_protocol *pp) +{ + /* nothing to do now */ +} + + +#ifdef CONFIG_IP_VS_PROTO_AH +struct ip_vs_protocol ip_vs_protocol_ah = { + .name = "AH", + .protocol = IPPROTO_AH, + .num_states = 1, + .dont_defrag = 1, + .init = ah_esp_init, + .exit = ah_esp_exit, + .conn_schedule = ah_esp_conn_schedule, + .conn_in_get = ah_esp_conn_in_get, + .conn_out_get = ah_esp_conn_out_get, + .snat_handler = NULL, + .dnat_handler = NULL, + .csum_check = NULL, + .state_transition = NULL, + .register_app = NULL, + .unregister_app = NULL, + .app_conn_bind = NULL, + .debug_packet = ah_esp_debug_packet, + .timeout_change = NULL, /* ISAKMP */ + .set_state_timeout = NULL, +}; +#endif + +#ifdef CONFIG_IP_VS_PROTO_ESP +struct ip_vs_protocol ip_vs_protocol_esp = { + .name = "ESP", + .protocol = IPPROTO_ESP, + .num_states = 1, + .dont_defrag = 1, + .init = ah_esp_init, + .exit = ah_esp_exit, + .conn_schedule = ah_esp_conn_schedule, + .conn_in_get = ah_esp_conn_in_get, + .conn_out_get = ah_esp_conn_out_get, + .snat_handler = NULL, + .dnat_handler = NULL, + .csum_check = NULL, + .state_transition = NULL, + .register_app = NULL, + .unregister_app = NULL, + .app_conn_bind = NULL, + .debug_packet = ah_esp_debug_packet, + .timeout_change = NULL, /* ISAKMP */ +}; +#endif diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index b83dc14b0a4d..dd4566ea2bff 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_tcp.c: TCP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * @@ -20,6 +18,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ +#include <net/ip6_checksum.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -27,8 +26,9 @@ static struct ip_vs_conn * -tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -37,19 +37,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } static struct ip_vs_conn * -tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -58,34 +59,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } static int -tcp_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; } if (th->syn && - (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, th->dest))) { + (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, + th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -112,22 +115,62 @@ tcp_conn_schedule(struct sk_buff *skb, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, +tcp_fast_csum_update(int af, struct tcphdr *tcph, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcph->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(tcph->check)))); + else +#endif tcph->check = - csum_fold(ip_vs_check_diff4(oldip, newip, + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(tcph->check)))); } +static inline void +tcp_partial_csum_update(int af, struct tcphdr *tcph, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, + __be16 oldlen, __be16 newlen) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcph->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(tcph->check)))); + else +#endif + tcph->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(tcph->check)))); +} + + static int tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); + oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -135,7 +178,7 @@ tcp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -143,13 +186,17 @@ tcp_snat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ - if (!cp->app) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - tcphoff)); + } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, + tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -157,9 +204,20 @@ tcp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); + IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -173,7 +231,16 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); + oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -181,7 +248,7 @@ tcp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -192,15 +259,19 @@ tcp_dnat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->dest = cp->dport; /* * Adjust TCP checksums */ - if (!cp->app) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - tcphoff)); + } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, + tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -208,9 +279,19 @@ tcp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; @@ -218,21 +299,43 @@ tcp_dnat_handler(struct sk_buff *skb, static int -tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - skb->len - tcphoff, - ip_hdr(skb)->protocol, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - tcphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - tcphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ @@ -421,19 +524,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; - IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" - "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n", - pp->name, - (state_off==TCP_DIR_OUTPUT)?"output ":"input ", - th->syn? 'S' : '.', - th->fin? 'F' : '.', - th->ack? 'A' : '.', - th->rst? 'R' : '.', - NIPQUAD(cp->daddr), ntohs(cp->dport), - NIPQUAD(cp->caddr), ntohs(cp->cport), - tcp_state_name(cp->state), - tcp_state_name(new_state), - atomic_read(&cp->refcnt)); + IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" + "%s:%d state: %s->%s conn->refcnt:%d\n", + pp->name, + ((state_off == TCP_DIR_OUTPUT) ? + "output " : "input "), + th->syn ? 'S' : '.', + th->fin ? 'F' : '.', + th->ack ? 'A' : '.', + th->rst ? 'R' : '.', + IP_VS_DBG_ADDR(cp->af, &cp->daddr), + ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + tcp_state_name(cp->state), + tcp_state_name(new_state), + atomic_read(&cp->refcnt)); + if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (new_state != IP_VS_TCP_S_ESTABLISHED)) { @@ -463,7 +570,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, { struct tcphdr _tcph, *th; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); +#ifdef CONFIG_IP_VS_IPV6 + int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); +#else + int ihl = ip_hdrlen(skb); +#endif + + th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -548,12 +661,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&tcp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 75771cb3cd6f..6eb6039d6343 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * @@ -24,10 +22,12 @@ #include <net/ip_vs.h> #include <net/ip.h> +#include <net/ip6_checksum.h> static struct ip_vs_conn * -udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; @@ -37,13 +37,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; @@ -51,25 +51,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * -udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; @@ -77,21 +77,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static int -udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct udphdr _udph, *uh; + struct ip_vs_iphdr iph; - uh = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_udph), &_udph); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + + uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); if (uh == NULL) { *verdict = NF_DROP; return 0; } - if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, uh->dest))) { + svc = ip_vs_service_get(af, skb->mark, iph.protocol, + &iph.daddr, uh->dest); + if (svc) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -118,23 +121,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, +udp_fast_csum_update(int af, struct udphdr *uhdr, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { - uhdr->check = - csum_fold(ip_vs_check_diff4(oldip, newip, - ip_vs_check_diff2(oldport, newport, - ~csum_unfold(uhdr->check)))); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + uhdr->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); + else +#endif + uhdr->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } +static inline void +udp_partial_csum_update(int af, struct udphdr *uhdr, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, + __be16 oldlen, __be16 newlen) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + uhdr->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(uhdr->check)))); + else +#endif + uhdr->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(uhdr->check)))); +} + + static int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); + oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -142,7 +185,7 @@ udp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -152,15 +195,19 @@ udp_snat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* * Adjust UDP checksums */ - if (!cp->app && (udph->check != 0)) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - udphoff)); + } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->daddr, cp->vaddr, + udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -168,9 +215,19 @@ udp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -186,7 +243,16 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); + oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -194,7 +260,7 @@ udp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -205,15 +271,19 @@ udp_dnat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* * Adjust UDP checksums */ - if (!cp->app && (udph->check != 0)) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - udphoff)); + } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->vaddr, cp->daddr, + udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -221,9 +291,19 @@ udp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -233,10 +313,17 @@ udp_dnat_handler(struct sk_buff *skb, static int -udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) @@ -248,15 +335,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - udphoff, - ip_hdr(skb)->protocol, - skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - udphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - udphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ @@ -342,12 +442,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&udp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 433f8a947924..a22195f68ac4 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -1,8 +1,6 @@ /* * IPVS: Round-Robin Scheduling module * - * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * @@ -34,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_rr_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static int ip_vs_rr_update_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; @@ -82,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) out: svc->sched_data = q; write_unlock(&svc->sched_lock); - IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "RR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } @@ -96,15 +88,17 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_rr_init_svc, - .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, }; static int __init ip_vs_rr_init(void) { - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 121a32b1b756..a46ad9e35016 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * @@ -186,7 +184,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next != &scheduler->n_list) { + if (!list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); ip_vs_use_count_dec(); IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " @@ -231,7 +229,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next == &scheduler->n_list) { + if (list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler " "is not in the list. failed\n", scheduler->name); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index dd7c128f9db3..7d2f22f04b83 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -1,8 +1,6 @@ /* * IPVS: Shortest Expected Delay scheduling module * - * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -43,27 +41,6 @@ #include <net/ip_vs.h> -static int -ip_vs_sed_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { @@ -124,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "SED: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -140,16 +117,16 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .name = "sed", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, - .init_service = ip_vs_sed_init_svc, - .done_service = ip_vs_sed_done_svc, - .update_service = ip_vs_sed_update_svc, + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_sed_schedule, }; static int __init ip_vs_sed_init(void) { - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sed_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 1b25b00ef1e1..1d96de27fefd 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -1,8 +1,6 @@ /* * IPVS: Source Hashing scheduling module * - * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or @@ -217,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->saddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -232,6 +230,10 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, @@ -241,7 +243,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = static int __init ip_vs_sh_init(void) { - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index eff54efe0351..de5e7e118eed 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * ip_vs_sync: sync connection info from master load balancer to backups @@ -29,10 +27,13 @@ #include <linux/in.h> #include <linux/igmp.h> /* for ip_mc_join_group */ #include <linux/udp.h> +#include <linux/err.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/kernel.h> #include <net/ip.h> #include <net/sock.h> -#include <asm/uaccess.h> /* for get_fs and set_fs */ #include <net/ip_vs.h> @@ -68,8 +69,8 @@ struct ip_vs_sync_conn_options { }; struct ip_vs_sync_thread_data { - struct completion *startup; - int state; + struct socket *sock; + char *buf; }; #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) @@ -99,6 +100,7 @@ struct ip_vs_sync_thread_data { */ #define SYNC_MESG_HEADER_LEN 4 +#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ struct ip_vs_sync_mesg { __u8 nr_conns; @@ -140,18 +142,19 @@ volatile int ip_vs_backup_syncid = 0; char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; +/* sync daemon tasks */ +static struct task_struct *sync_master_thread; +static struct task_struct *sync_backup_thread; + /* multicast addr */ -static struct sockaddr_in mcast_addr; +static struct sockaddr_in mcast_addr = { + .sin_family = AF_INET, + .sin_port = __constant_htons(IP_VS_SYNC_PORT), + .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), +}; -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) -{ - spin_lock(&ip_vs_sync_lock); - list_add_tail(&sb->list, &ip_vs_sync_queue); - spin_unlock(&ip_vs_sync_lock); -} - -static inline struct ip_vs_sync_buff * sb_dequeue(void) +static inline struct ip_vs_sync_buff *sb_dequeue(void) { struct ip_vs_sync_buff *sb; @@ -195,6 +198,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } +static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) +{ + spin_lock(&ip_vs_sync_lock); + if (ip_vs_sync_state & IP_VS_STATE_MASTER) + list_add_tail(&sb->list, &ip_vs_sync_queue); + else + ip_vs_sync_buff_release(sb); + spin_unlock(&ip_vs_sync_lock); +} + /* * Get the current sync buffer if it has been created for more * than the specified time or the specified time is zero. @@ -245,9 +258,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) s->cport = cp->cport; s->vport = cp->vport; s->dport = cp->dport; - s->caddr = cp->caddr; - s->vaddr = cp->vaddr; - s->daddr = cp->daddr; + s->caddr = cp->caddr.ip; + s->vaddr = cp->vaddr.ip; + s->daddr = cp->daddr.ip; s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); s->state = htons(cp->state); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { @@ -355,21 +368,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_conn_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); else - cp = ip_vs_ct_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_ct_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); if (!cp) { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(s->daddr, s->dport, - s->vaddr, s->vport, + dest = ip_vs_find_dest(AF_INET, + (union nf_inet_addr *)&s->daddr, + s->dport, + (union nf_inet_addr *)&s->vaddr, + s->vport, s->protocol); /* Set the approprite ativity flag */ if (s->protocol == IPPROTO_TCP) { @@ -378,10 +398,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) else flags &= ~IP_VS_CONN_F_INACTIVE; } - cp = ip_vs_conn_new(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport, - s->daddr, s->dport, + cp = ip_vs_conn_new(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport, + (union nf_inet_addr *)&s->daddr, + s->dport, flags, dest); if (dest) atomic_dec(&dest->refcnt); @@ -495,8 +518,8 @@ static int set_sync_mesg_maxlen(int sync_state) num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = - SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num; + sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { @@ -574,14 +597,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) static struct socket * make_send_sock(void) { struct socket *sock; + int result; /* First create a socket */ - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (result < 0) { IP_VS_ERR("Error during creation of socket; terminating\n"); - return NULL; + return ERR_PTR(result); } - if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { + result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error setting outbound mcast interface\n"); goto error; } @@ -589,14 +615,15 @@ static struct socket * make_send_sock(void) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { + result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error binding address of the mcast interface\n"); goto error; } - if (sock->ops->connect(sock, - (struct sockaddr*)&mcast_addr, - sizeof(struct sockaddr), 0) < 0) { + result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, + sizeof(struct sockaddr), 0); + if (result < 0) { IP_VS_ERR("Error connecting to the multicast addr\n"); goto error; } @@ -605,7 +632,7 @@ static struct socket * make_send_sock(void) error: sock_release(sock); - return NULL; + return ERR_PTR(result); } @@ -615,27 +642,30 @@ static struct socket * make_send_sock(void) static struct socket * make_receive_sock(void) { struct socket *sock; + int result; /* First create a socket */ - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (result < 0) { IP_VS_ERR("Error during creation of socket; terminating\n"); - return NULL; + return ERR_PTR(result); } /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; - if (sock->ops->bind(sock, - (struct sockaddr*)&mcast_addr, - sizeof(struct sockaddr)) < 0) { + result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, + sizeof(struct sockaddr)); + if (result < 0) { IP_VS_ERR("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ - if (join_mcast_group(sock->sk, - (struct in_addr*)&mcast_addr.sin_addr, - ip_vs_backup_mcast_ifn) < 0) { + result = join_mcast_group(sock->sk, + (struct in_addr *) &mcast_addr.sin_addr, + ip_vs_backup_mcast_ifn); + if (result < 0) { IP_VS_ERR("Error joining to the multicast group\n"); goto error; } @@ -644,7 +674,7 @@ static struct socket * make_receive_sock(void) error: sock_release(sock); - return NULL; + return ERR_PTR(result); } @@ -702,44 +732,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) } -static DECLARE_WAIT_QUEUE_HEAD(sync_wait); -static pid_t sync_master_pid = 0; -static pid_t sync_backup_pid = 0; - -static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); -static int stop_master_sync = 0; -static int stop_backup_sync = 0; - -static void sync_master_loop(void) +static int sync_thread_master(void *data) { - struct socket *sock; + struct ip_vs_sync_thread_data *tinfo = data; struct ip_vs_sync_buff *sb; - /* create the sending multicast socket */ - sock = make_send_sock(); - if (!sock) - return; - IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d\n", ip_vs_master_mcast_ifn, ip_vs_master_syncid); - for (;;) { - while ((sb=sb_dequeue())) { - ip_vs_send_sync_msg(sock, sb->mesg); + while (!kthread_should_stop()) { + while ((sb = sb_dequeue())) { + ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } /* check if entries stay in curr_sb for 2 seconds */ - if ((sb = get_curr_sync_buff(2*HZ))) { - ip_vs_send_sync_msg(sock, sb->mesg); + sb = get_curr_sync_buff(2 * HZ); + if (sb) { + ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } - if (stop_master_sync) - break; - - msleep_interruptible(1000); + schedule_timeout_interruptible(HZ); } /* clean up the sync_buff queue */ @@ -753,267 +768,175 @@ static void sync_master_loop(void) } /* release the sending multicast socket */ - sock_release(sock); + sock_release(tinfo->sock); + kfree(tinfo); + + return 0; } -static void sync_backup_loop(void) +static int sync_thread_backup(void *data) { - struct socket *sock; - char *buf; + struct ip_vs_sync_thread_data *tinfo = data; int len; - if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) { - IP_VS_ERR("sync_backup_loop: kmalloc error\n"); - return; - } - - /* create the receiving multicast socket */ - sock = make_receive_sock(); - if (!sock) - goto out; - IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d\n", ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); - for (;;) { - /* do you have data now? */ - while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { - if ((len = - ip_vs_receive(sock, buf, - sync_recv_mesg_maxlen)) <= 0) { + while (!kthread_should_stop()) { + wait_event_interruptible(*tinfo->sock->sk->sk_sleep, + !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) + || kthread_should_stop()); + + /* do we have data now? */ + while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { + len = ip_vs_receive(tinfo->sock, tinfo->buf, + sync_recv_mesg_maxlen); + if (len <= 0) { IP_VS_ERR("receiving message error\n"); break; } - /* disable bottom half, because it accessed the data + + /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(buf, len); + ip_vs_process_message(tinfo->buf, len); local_bh_enable(); } - - if (stop_backup_sync) - break; - - msleep_interruptible(1000); } /* release the sending multicast socket */ - sock_release(sock); + sock_release(tinfo->sock); + kfree(tinfo->buf); + kfree(tinfo); - out: - kfree(buf); + return 0; } -static void set_sync_pid(int sync_state, pid_t sync_pid) -{ - if (sync_state == IP_VS_STATE_MASTER) - sync_master_pid = sync_pid; - else if (sync_state == IP_VS_STATE_BACKUP) - sync_backup_pid = sync_pid; -} - -static void set_stop_sync(int sync_state, int set) +int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) { - if (sync_state == IP_VS_STATE_MASTER) - stop_master_sync = set; - else if (sync_state == IP_VS_STATE_BACKUP) - stop_backup_sync = set; - else { - stop_master_sync = set; - stop_backup_sync = set; - } -} + struct ip_vs_sync_thread_data *tinfo; + struct task_struct **realtask, *task; + struct socket *sock; + char *name, *buf = NULL; + int (*threadfn)(void *data); + int result = -ENOMEM; -static int sync_thread(void *startup) -{ - DECLARE_WAITQUEUE(wait, current); - mm_segment_t oldmm; - int state; - const char *name; - struct ip_vs_sync_thread_data *tinfo = startup; + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", + sizeof(struct ip_vs_sync_conn)); - /* increase the module use count */ - ip_vs_use_count_inc(); + if (state == IP_VS_STATE_MASTER) { + if (sync_master_thread) + return -EEXIST; - if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { - state = IP_VS_STATE_MASTER; + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, + sizeof(ip_vs_master_mcast_ifn)); + ip_vs_master_syncid = syncid; + realtask = &sync_master_thread; name = "ipvs_syncmaster"; - } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { - state = IP_VS_STATE_BACKUP; + threadfn = sync_thread_master; + sock = make_send_sock(); + } else if (state == IP_VS_STATE_BACKUP) { + if (sync_backup_thread) + return -EEXIST; + + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, + sizeof(ip_vs_backup_mcast_ifn)); + ip_vs_backup_syncid = syncid; + realtask = &sync_backup_thread; name = "ipvs_syncbackup"; + threadfn = sync_thread_backup; + sock = make_receive_sock(); } else { - IP_VS_BUG(); - ip_vs_use_count_dec(); return -EINVAL; } - daemonize(name); - - oldmm = get_fs(); - set_fs(KERNEL_DS); - - /* Block all signals */ - spin_lock_irq(¤t->sighand->siglock); - siginitsetinv(¤t->blocked, 0); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + if (IS_ERR(sock)) { + result = PTR_ERR(sock); + goto out; + } - /* set the maximum length of sync message */ set_sync_mesg_maxlen(state); + if (state == IP_VS_STATE_BACKUP) { + buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); + if (!buf) + goto outsocket; + } - /* set up multicast address */ - mcast_addr.sin_family = AF_INET; - mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); - mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP); - - add_wait_queue(&sync_wait, &wait); - - set_sync_pid(state, task_pid_nr(current)); - complete(tinfo->startup); - - /* - * once we call the completion queue above, we should - * null out that reference, since its allocated on the - * stack of the creating kernel thread - */ - tinfo->startup = NULL; - - /* processing master/backup loop here */ - if (state == IP_VS_STATE_MASTER) - sync_master_loop(); - else if (state == IP_VS_STATE_BACKUP) - sync_backup_loop(); - else IP_VS_BUG(); - - remove_wait_queue(&sync_wait, &wait); - - /* thread exits */ - - /* - * If we weren't explicitly stopped, then we - * exited in error, and should undo our state - */ - if ((!stop_master_sync) && (!stop_backup_sync)) - ip_vs_sync_state -= tinfo->state; + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + if (!tinfo) + goto outbuf; - set_sync_pid(state, 0); - IP_VS_INFO("sync thread stopped!\n"); + tinfo->sock = sock; + tinfo->buf = buf; - set_fs(oldmm); + task = kthread_run(threadfn, tinfo, name); + if (IS_ERR(task)) { + result = PTR_ERR(task); + goto outtinfo; + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + /* mark as active */ + *realtask = task; + ip_vs_sync_state |= state; - set_stop_sync(state, 0); - wake_up(&stop_sync_wait); + /* increase the module use count */ + ip_vs_use_count_inc(); - /* - * we need to free the structure that was allocated - * for us in start_sync_thread - */ - kfree(tinfo); return 0; -} - - -static int fork_sync_thread(void *startup) -{ - pid_t pid; - - /* fork the sync thread here, then the parent process of the - sync thread is the init process after this thread exits. */ - repeat: - if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { - IP_VS_ERR("could not create sync_thread due to %d... " - "retrying.\n", pid); - msleep_interruptible(1000); - goto repeat; - } - return 0; +outtinfo: + kfree(tinfo); +outbuf: + kfree(buf); +outsocket: + sock_release(sock); +out: + return result; } -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) +int stop_sync_thread(int state) { - DECLARE_COMPLETION_ONSTACK(startup); - pid_t pid; - struct ip_vs_sync_thread_data *tinfo; - - if ((state == IP_VS_STATE_MASTER && sync_master_pid) || - (state == IP_VS_STATE_BACKUP && sync_backup_pid)) - return -EEXIST; - - /* - * Note that tinfo will be freed in sync_thread on exit - */ - tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); - if (!tinfo) - return -ENOMEM; - IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); - IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", - sizeof(struct ip_vs_sync_conn)); - ip_vs_sync_state |= state; if (state == IP_VS_STATE_MASTER) { - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, - sizeof(ip_vs_master_mcast_ifn)); - ip_vs_master_syncid = syncid; - } else { - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, - sizeof(ip_vs_backup_mcast_ifn)); - ip_vs_backup_syncid = syncid; - } - - tinfo->state = state; - tinfo->startup = &startup; - - repeat: - if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { - IP_VS_ERR("could not create fork_sync_thread due to %d... " - "retrying.\n", pid); - msleep_interruptible(1000); - goto repeat; - } - - wait_for_completion(&startup); - - return 0; -} + if (!sync_master_thread) + return -ESRCH; + IP_VS_INFO("stopping master sync thread %d ...\n", + task_pid_nr(sync_master_thread)); -int stop_sync_thread(int state) -{ - DECLARE_WAITQUEUE(wait, current); + /* + * The lock synchronizes with sb_queue_tail(), so that we don't + * add sync buffers to the queue, when we are already in + * progress of stopping the master sync daemon. + */ - if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || - (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) - return -ESRCH; + spin_lock_bh(&ip_vs_sync_lock); + ip_vs_sync_state &= ~IP_VS_STATE_MASTER; + spin_unlock_bh(&ip_vs_sync_lock); + kthread_stop(sync_master_thread); + sync_master_thread = NULL; + } else if (state == IP_VS_STATE_BACKUP) { + if (!sync_backup_thread) + return -ESRCH; + + IP_VS_INFO("stopping backup sync thread %d ...\n", + task_pid_nr(sync_backup_thread)); + + ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; + kthread_stop(sync_backup_thread); + sync_backup_thread = NULL; + } else { + return -EINVAL; + } - IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); - IP_VS_INFO("stopping sync thread %d ...\n", - (state == IP_VS_STATE_MASTER) ? - sync_master_pid : sync_backup_pid); - - __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&stop_sync_wait, &wait); - set_stop_sync(state, 1); - ip_vs_sync_state -= state; - wake_up(&sync_wait); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&stop_sync_wait, &wait); - - /* Note: no need to reap the sync thread, because its parent - process is the init process */ - - if ((state == IP_VS_STATE_MASTER && stop_master_sync) || - (state == IP_VS_STATE_BACKUP && stop_backup_sync)) - IP_VS_BUG(); + /* decrease the module use count */ + ip_vs_use_count_dec(); return 0; } diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 8a9d913261d8..8c596e712599 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Least-Connection Scheduling module * - * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * @@ -27,27 +25,6 @@ #include <net/ip_vs.h> -static int -ip_vs_wlc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) { @@ -112,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "WLC: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -128,16 +105,16 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, - .init_service = ip_vs_wlc_init_svc, - .done_service = ip_vs_wlc_done_svc, - .update_service = ip_vs_wlc_update_svc, + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_wlc_schedule, }; static int __init ip_vs_wlc_init(void) { - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 85c680add6df..7ea92fed50bf 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Round-Robin Scheduling module * - * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or @@ -197,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), - atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "WRR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), + atomic_read(&dest->weight)); out: write_unlock(&svc->sched_lock); @@ -214,6 +212,10 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .name = "wrr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, @@ -222,7 +224,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { static int __init ip_vs_wrr_init(void) { - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; } diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index f63006caea03..02ddc2b3ce2e 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1,8 +1,6 @@ /* * ip_vs_xmit.c: various packet transmitters for IPVS * - * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * @@ -22,6 +20,9 @@ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/route.h> /* for ip_route_output */ +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -49,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) if (!dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && + if ((dst->obsolete + || (dest->af == AF_INET && rtos != dest->dst_rtos)) && dst->ops->check(dst, cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); @@ -73,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = dest->addr, + .daddr = dest->addr.ip, .saddr = 0, .tos = rtos, } }, }; @@ -82,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, " "dest: %u.%u.%u.%u\n", - NIPQUAD(dest->addr)); + NIPQUAD(dest->addr.ip)); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), atomic_read(&rt->u.dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); @@ -96,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = cp->daddr, + .daddr = cp->daddr.ip, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(&init_net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: " - "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); + "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip)); return NULL; } } @@ -111,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) return rt; } +#ifdef CONFIG_IP_VS_IPV6 +static struct rt6_info * +__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct ip_vs_dest *dest = cp->dest; + + if (dest) { + spin_lock(&dest->dst_lock); + rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); + if (!rt) { + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = dest->addr.in6, + .saddr = { + .s6_addr32 = + { 0, 0, 0, 0 }, + }, + }, + }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, + NULL, &fl); + if (!rt) { + spin_unlock(&dest->dst_lock); + IP_VS_DBG_RL("ip6_route_output error, " + "dest: " NIP6_FMT "\n", + NIP6(dest->addr.in6)); + return NULL; + } + __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n", + NIP6(dest->addr.in6), + atomic_read(&rt->u.dst.__refcnt)); + } + spin_unlock(&dest->dst_lock); + } else { + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = cp->daddr.in6, + .saddr = { + .s6_addr32 = { 0, 0, 0, 0 }, + }, + }, + }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (!rt) { + IP_VS_DBG_RL("ip6_route_output error, dest: " + NIP6_FMT "\n", NIP6(cp->daddr.in6)); + return NULL; + } + } + + return rt; +} +#endif + /* * Release dest->dst_cache before a dest is removed @@ -125,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) dst_release(old_dst); } -#define IP_VS_XMIT(skb, rt) \ +#define IP_VS_XMIT(pf, skb, rt) \ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ - NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \ + NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ } while (0) @@ -202,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -215,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct ipv6hdr *iph = ipv6_hdr(skb); + int mtu; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + EnterFunction(10); + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (!rt) { + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, " + "dest: " NIP6_FMT "\n", NIP6(iph->daddr)); + goto tx_error_icmp; + } + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + + tx_error_icmp: + dst_link_failure(skb); + tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif /* * NAT transmitter (only for outside-to-inside nat forwarding) @@ -266,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; - ip_hdr(skb)->daddr = cp->daddr; + ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); @@ -278,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -294,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + /* check if it is a connection of no-client-port */ + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + __be16 _pt, *p; + p = skb_header_pointer(skb, sizeof(struct ipv6hdr), + sizeof(_pt), &_pt); + if (p == NULL) + goto tx_error; + ip_vs_conn_fill_cport(cp, *p); + IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "ip_vs_nat_xmit_v6(): frag needed for"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* mangle the packet */ + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + goto tx_error; + ipv6_hdr(skb)->daddr = cp->daddr.in6; + + IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + + /* FIXME: when application helper enlarges the packet and the length + is larger than the MTU of outgoing device, there will be still + MTU problem. */ + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + LeaveFunction(10); + kfree_skb(skb); + return NF_STOLEN; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif + /* * IP Tunneling transmitter @@ -425,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *old_iph = ipv6_hdr(skb); + sk_buff_data_t old_transport_header = skb->transport_header; + struct ipv6hdr *iph; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int mtu; + + EnterFunction(10); + + if (skb->protocol != htons(ETH_P_IPV6)) { + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, " + "ETH_P_IPV6: %d, skb protocol: %d\n", + htons(ETH_P_IPV6), skb->protocol); + goto tx_error; + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + tdev = rt->u.dst.dev; + + mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + /* TODO IPv6: do we need this check in IPv6? */ + if (mtu < 1280) { + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); + goto tx_error; + } + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + + if (skb_headroom(skb) < max_headroom + || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = + skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + dst_release(&rt->u.dst); + kfree_skb(skb); + IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n"); + return NF_STOLEN; + } + kfree_skb(skb); + skb = new_skb; + old_iph = ipv6_hdr(skb); + } + + skb->transport_header = old_transport_header; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + iph = ipv6_hdr(skb); + iph->version = 6; + iph->nexthdr = IPPROTO_IPV6; + iph->payload_len = old_iph->payload_len + sizeof(old_iph); + iph->priority = old_iph->priority; + memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + iph->daddr = rt->rt6i_dst.addr; + iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ + iph->hop_limit = old_iph->hop_limit; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + ip6_local_out(skb); + + LeaveFunction(10); + + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * Direct Routing transmitter @@ -469,7 +782,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -482,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * ICMP packet transmitter @@ -542,7 +909,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); rc = NF_STOLEN; goto out; @@ -559,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_rt_put(rt); goto tx_error; } + +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, int offset) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + int rc; + + EnterFunction(10); + + /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be + forwarded directly here, because there is no need to + translate address/port back */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + if (cp->packet_xmit) + rc = cp->packet_xmit(skb, cp, pp); + else + rc = NF_ACCEPT; + /* do not touch skb anymore */ + atomic_inc(&cp->in_pkts); + goto out; + } + + /* + * mangle and send the packet here (only for VS/NAT) + */ + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, offset)) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop the old route when skb is not shared */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + ip_vs_nat_icmp_v6(skb, pp, cp, 0); + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + rc = NF_STOLEN; + goto out; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + dev_kfree_skb(skb); + rc = NF_STOLEN; +out: + LeaveFunction(10); + return rc; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c new file mode 100644 index 000000000000..03591d37b9cc --- /dev/null +++ b/net/netfilter/nf_conntrack_acct.c @@ -0,0 +1,148 @@ +/* Accouting handling for netfilter. */ + +/* + * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/netfilter.h> +#include <linux/kernel.h> +#include <linux/moduleparam.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_acct.h> + +#ifdef CONFIG_NF_CT_ACCT +#define NF_CT_ACCT_DEFAULT 1 +#else +#define NF_CT_ACCT_DEFAULT 0 +#endif + +static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; + +module_param_named(acct, nf_ct_acct, bool, 0644); +MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); + +#ifdef CONFIG_SYSCTL +static struct ctl_table acct_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_acct", + .data = &init_net.ct.sysctl_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)acct[dir].packets, + (unsigned long long)acct[dir].bytes); +}; +EXPORT_SYMBOL_GPL(seq_print_acct); + +static struct nf_ct_ext_type acct_extend __read_mostly = { + .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), + .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .id = NF_CT_EXT_ACCT, +}; + +#ifdef CONFIG_SYSCTL +static int nf_conntrack_acct_init_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_acct; + + net->ct.acct_sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.acct_sysctl_header) { + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + goto out_register; + } + return 0; + +out_register: + kfree(table); +out: + return -ENOMEM; +} + +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = net->ct.acct_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.acct_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_acct_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ +} +#endif + +int nf_conntrack_acct_init(struct net *net) +{ + int ret; + + net->ct.sysctl_acct = nf_ct_acct; + + if (net_eq(net, &init_net)) { +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); +#endif + + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_acct_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); +out_extend_register: + return ret; +} + +void nf_conntrack_acct_fini(struct net *net) +{ + nf_conntrack_acct_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); +} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 662c1ccfee26..27de3c7b006e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -37,36 +37,24 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_acct.h> #define NF_CONNTRACK_VERSION "0.5.0" DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); -/* nf_conntrack_standalone needs this */ -atomic_t nf_conntrack_count = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(nf_conntrack_count); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct hlist_head *nf_conntrack_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash); - struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -unsigned int nf_ct_log_invalid __read_mostly; -HLIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -179,6 +167,7 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; + struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -211,7 +200,7 @@ destroy_conntrack(struct nf_conntrack *nfct) hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } - NF_CT_STAT_INC(delete); + NF_CT_STAT_INC(net, delete); spin_unlock_bh(&nf_conntrack_lock); if (ct->master) @@ -224,6 +213,7 @@ destroy_conntrack(struct nf_conntrack *nfct) static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct net *net = nf_ct_net(ct); struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -238,14 +228,14 @@ static void death_by_timeout(unsigned long ul_conntrack) spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ - NF_CT_STAT_INC(delete_list); + NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; @@ -255,13 +245,13 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple) * at least once for the stats anyway. */ local_bh_disable(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); local_bh_enable(); return h; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } local_bh_enable(); @@ -271,13 +261,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); - h = __nf_conntrack_find(tuple); + h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) @@ -293,10 +283,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { + struct net *net = nf_ct_net(ct); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); + &net->ct.hash[hash]); hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + &net->ct.hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -322,8 +314,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn_help *help; struct hlist_node *n; enum ip_conntrack_info ctinfo; + struct net *net; ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -350,11 +344,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; @@ -370,22 +364,22 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); + NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, skb); + nf_conntrack_event_cache(IPCT_HELPER, ct); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, skb); + nf_conntrack_event_cache(IPCT_NATINFO, ct); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, skb); + IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: - NF_CT_STAT_INC(insert_failed); + NF_CT_STAT_INC(net, insert_failed); spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } @@ -397,6 +391,7 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { + struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); @@ -405,14 +400,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * least once for the stats anyway. */ rcu_read_lock_bh(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } rcu_read_unlock_bh(); @@ -424,7 +419,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(unsigned int hash) +static noinline int early_drop(struct net *net, unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; @@ -435,7 +430,7 @@ static noinline int early_drop(unsigned int hash) rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) @@ -457,14 +452,16 @@ static noinline int early_drop(unsigned int hash) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - NF_CT_STAT_INC_ATOMIC(early_drop); + NF_CT_STAT_INC_ATOMIC(net, early_drop); } nf_ct_put(ct); return dropped; } -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl) +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl, + gfp_t gfp) { struct nf_conn *ct = NULL; @@ -474,13 +471,13 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } /* We don't want any race condition at early drop stage */ - atomic_inc(&nf_conntrack_count); + atomic_inc(&net->ct.count); if (nf_conntrack_max && - unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) { + unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); - if (!early_drop(hash)) { - atomic_dec(&nf_conntrack_count); + if (!early_drop(net, hash)) { + atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -489,10 +486,10 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); + ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } @@ -501,6 +498,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); +#ifdef CONFIG_NET_NS + ct->ct_net = net; +#endif INIT_RCU_HEAD(&ct->rcu); return ct; @@ -510,10 +510,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) @@ -526,7 +527,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(const struct nf_conntrack_tuple *tuple, +init_conntrack(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, @@ -542,7 +544,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - ct = nf_conntrack_alloc(tuple, &repl_tuple); + ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -554,8 +556,10 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } + nf_ct_acct_ext_add(ct, GFP_ATOMIC); + spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(tuple); + exp = nf_ct_find_expectation(net, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -575,7 +579,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, ct->secmark = exp->master->secmark; #endif nf_conntrack_get(&ct->master->ct_general); - NF_CT_STAT_INC(expect_new); + NF_CT_STAT_INC(net, expect_new); } else { struct nf_conntrack_helper *helper; @@ -585,11 +589,12 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, if (help) rcu_assign_pointer(help->helper, helper); } - NF_CT_STAT_INC(new); + NF_CT_STAT_INC(net, new); } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &net->ct.unconfirmed); spin_unlock_bh(&nf_conntrack_lock); @@ -604,7 +609,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct sk_buff *skb, +resolve_normal_ct(struct net *net, + struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, @@ -625,9 +631,9 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -661,7 +667,8 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, + struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -674,44 +681,46 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(ignore); + NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - l3proto = __nf_ct_l3proto_find((u_int16_t)pf); + l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } - l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum); + l4proto = __nf_ct_l4proto_find(pf, protonum); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (l4proto->error != NULL && - (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); - return -ret; + if (l4proto->error != NULL) { + ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + if (ret <= 0) { + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); + return -ret; + } } - ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, - &set_reply, &ctinfo); + ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - NF_CT_STAT_INC_ATOMIC(drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } @@ -724,12 +733,12 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(skb->nfct); skb->nfct = NULL; - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); return ret; } @@ -827,26 +836,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, } acct: -#ifdef CONFIG_NF_CT_ACCT if (do_acct) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + struct nf_conn_counter *acct; - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } } -#endif spin_unlock_bh(&nf_conntrack_lock); /* must be unlocked when calling event cache */ if (event) - nf_conntrack_event_cache(event, skb); + nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); +bool __nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + int do_acct) +{ + if (do_acct) { + struct nf_conn_counter *acct; + + spin_lock_bh(&nf_conntrack_lock); + acct = nf_conn_acct_find(ct); + if (acct) { + acct[CTINFO2DIR(ctinfo)].packets++; + acct[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + } + spin_unlock_bh(&nf_conntrack_lock); + } + + if (del_timer(&ct->timeout)) { + ct->timeout.function((unsigned long)ct); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> @@ -909,7 +943,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; @@ -918,13 +952,13 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { + hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; } } - hlist_for_each_entry(h, n, &unconfirmed, hnode) { + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -937,13 +971,14 @@ found: return ct; } -void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +void nf_ct_iterate_cleanup(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data) { struct nf_conn *ct; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); @@ -969,27 +1004,26 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(void) +void nf_conntrack_flush(struct net *net) { - nf_ct_iterate_cleanup(kill_all, NULL); + nf_ct_iterate_cleanup(net, kill_all, NULL); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void nf_conntrack_cleanup(void) +static void nf_conntrack_cleanup_init_net(void) { - rcu_assign_pointer(ip_ct_attach, NULL); - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); + nf_conntrack_helper_fini(); + nf_conntrack_proto_fini(); + kmem_cache_destroy(nf_conntrack_cachep); +} - nf_ct_event_cache_flush(); +static void nf_conntrack_cleanup_net(struct net *net) +{ + nf_ct_event_cache_flush(net); + nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(); - if (atomic_read(&nf_conntrack_count) != 0) { + nf_conntrack_flush(net); + if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; } @@ -997,15 +1031,31 @@ void nf_conntrack_cleanup(void) while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) schedule(); - rcu_assign_pointer(nf_ct_destroy, NULL); - - kmem_cache_destroy(nf_conntrack_cachep); - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); + nf_conntrack_acct_fini(net); + nf_conntrack_expect_fini(net); + free_percpu(net->ct.stat); +} - nf_conntrack_proto_fini(); - nf_conntrack_helper_fini(); - nf_conntrack_expect_fini(); +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void nf_conntrack_cleanup(struct net *net) +{ + if (net_eq(net, &init_net)) + rcu_assign_pointer(ip_ct_attach, NULL); + + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + nf_conntrack_cleanup_net(net); + + if (net_eq(net, &init_net)) { + rcu_assign_pointer(nf_ct_destroy, NULL); + nf_conntrack_cleanup_init_net(); + } } struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) @@ -1064,8 +1114,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!hlist_empty(&nf_conntrack_hash[i])) { - h = hlist_entry(nf_conntrack_hash[i].first, + while (!hlist_empty(&init_net.ct.hash[i])) { + h = hlist_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnode); hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); @@ -1073,12 +1123,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) } } old_size = nf_conntrack_htable_size; - old_vmalloced = nf_conntrack_vmalloc; - old_hash = nf_conntrack_hash; + old_vmalloced = init_net.ct.hash_vmalloc; + old_hash = init_net.ct.hash; nf_conntrack_htable_size = hashsize; - nf_conntrack_vmalloc = vmalloced; - nf_conntrack_hash = hash; + init_net.ct.hash_vmalloc = vmalloced; + init_net.ct.hash = hash; nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); @@ -1090,7 +1140,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -int __init nf_conntrack_init(void) +static int nf_conntrack_init_init_net(void) { int max_factor = 8; int ret; @@ -1112,13 +1162,6 @@ int __init nf_conntrack_init(void) * entries. */ max_factor = 4; } - nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, - &nf_conntrack_vmalloc); - if (!nf_conntrack_hash) { - printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_out; - } - nf_conntrack_max = max_factor * nf_conntrack_htable_size; printk("nf_conntrack version %s (%u buckets, %d max)\n", @@ -1130,42 +1173,103 @@ int __init nf_conntrack_init(void) 0, 0, NULL); if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - goto err_free_hash; + ret = -ENOMEM; + goto err_cache; } ret = nf_conntrack_proto_init(); if (ret < 0) - goto err_free_conntrack_slab; - - ret = nf_conntrack_expect_init(); - if (ret < 0) - goto out_fini_proto; + goto err_proto; ret = nf_conntrack_helper_init(); if (ret < 0) - goto out_fini_expect; + goto err_helper; + + return 0; - /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); - rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); +err_helper: + nf_conntrack_proto_fini(); +err_proto: + kmem_cache_destroy(nf_conntrack_cachep); +err_cache: + return ret; +} + +static int nf_conntrack_init_net(struct net *net) +{ + int ret; + + atomic_set(&net->ct.count, 0); + INIT_HLIST_HEAD(&net->ct.unconfirmed); + net->ct.stat = alloc_percpu(struct ip_conntrack_stat); + if (!net->ct.stat) { + ret = -ENOMEM; + goto err_stat; + } + ret = nf_conntrack_ecache_init(net); + if (ret < 0) + goto err_ecache; + net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &net->ct.hash_vmalloc); + if (!net->ct.hash) { + ret = -ENOMEM; + printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); + goto err_hash; + } + ret = nf_conntrack_expect_init(net); + if (ret < 0) + goto err_expect; + ret = nf_conntrack_acct_init(net); + if (ret < 0) + goto err_acct; /* Set up fake conntrack: - to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - return ret; + return 0; -out_fini_expect: - nf_conntrack_expect_fini(); -out_fini_proto: - nf_conntrack_proto_fini(); -err_free_conntrack_slab: - kmem_cache_destroy(nf_conntrack_cachep); -err_free_hash: - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, +err_acct: + nf_conntrack_expect_fini(net); +err_expect: + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); -err_out: - return -ENOMEM; +err_hash: + nf_conntrack_ecache_fini(net); +err_ecache: + free_percpu(net->ct.stat); +err_stat: + return ret; +} + +int nf_conntrack_init(struct net *net) +{ + int ret; + + if (net_eq(net, &init_net)) { + ret = nf_conntrack_init_init_net(); + if (ret < 0) + goto out_init_net; + } + ret = nf_conntrack_init_net(net); + if (ret < 0) + goto out_net; + + if (net_eq(net, &init_net)) { + /* For use by REJECT target */ + rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); + rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + } + return 0; + +out_net: + if (net_eq(net, &init_net)) + nf_conntrack_cleanup_init_net(); +out_init_net: + return ret; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 83c41ac3505b..a5f5e2e65d13 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain); ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); EXPORT_SYMBOL_GPL(nf_ct_expect_chain); -DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); -EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void @@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) * by code prior to async packet handling for freeing the skb */ void nf_ct_deliver_cached_events(const struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ecache->ct == ct) __nf_ct_deliver_cached_events(ecache); local_bh_enable(); @@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); /* Deliver cached events for old pending events, if current conntrack != old */ void __nf_ct_event_cache_init(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); BUG_ON(ecache->ct == ct); if (ecache->ct) __nf_ct_deliver_cached_events(ecache); @@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); /* flush the event cache - touches other CPU's data and must not be called * while packets are still passing through the code */ -void nf_ct_event_cache_flush(void) +void nf_ct_event_cache_flush(struct net *net) { struct nf_conntrack_ecache *ecache; int cpu; for_each_possible_cpu(cpu) { - ecache = &per_cpu(nf_conntrack_ecache, cpu); + ecache = per_cpu_ptr(net->ct.ecache, cpu); if (ecache->ct) nf_ct_put(ecache->ct); } } +int nf_conntrack_ecache_init(struct net *net) +{ + net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache); + if (!net->ct.ecache) + return -ENOMEM; + return 0; +} + +void nf_conntrack_ecache_fini(struct net *net) +{ + free_percpu(net->ct.ecache); +} + int nf_conntrack_register_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&nf_conntrack_chain, nb); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e8f0dead267f..37a703bc3b8e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -28,17 +28,12 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_tuple.h> -struct hlist_head *nf_ct_expect_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_expect_hash); - unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; -static unsigned int nf_ct_expect_count; unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; -static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -46,18 +41,19 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); - nf_ct_expect_count--; + net->ct.expect_count--; hlist_del(&exp->lnode); master_help->expecting[exp->class]--; nf_ct_expect_put(exp); - NF_CT_STAT_INC(expect_delete); + NF_CT_STAT_INC(net, expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(tuple); + i = __nf_ct_expect_find(net, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { exp = i; @@ -241,7 +237,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, - int family, + u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) @@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); @@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - nf_ct_expect_count++; + hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); @@ -329,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); + NF_CT_STAT_INC(net, expect_create); } /* Race with expectations being used means we could have none to find; OK. */ @@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret; @@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) } } - if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net->ct.expect_count >= nf_ct_expect_max) { if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); @@ -425,16 +423,18 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -444,13 +444,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, + return seq_open_net(inode, file, &exp_seq_ops, sizeof(struct ct_expect_iter_state)); } @@ -533,72 +534,79 @@ static const struct file_operations exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* CONFIG_PROC_FS */ -static int __init exp_proc_init(void) +static int exp_proc_init(struct net *net) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; - proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_PROC_FS */ return 0; } -static void exp_proc_remove(void) +static void exp_proc_remove(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "nf_conntrack_expect"); + proc_net_remove(net, "nf_conntrack_expect"); #endif /* CONFIG_PROC_FS */ } module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int __init nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(struct net *net) { int err = -ENOMEM; - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; + if (net_eq(net, &init_net)) { + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; } - nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &nf_ct_expect_vmalloc); - if (nf_ct_expect_hash == NULL) + net->ct.expect_count = 0; + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &net->ct.expect_vmalloc); + if (net->ct.expect_hash == NULL) goto err1; - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + if (net_eq(net, &init_net)) { + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), 0, 0, NULL); - if (!nf_ct_expect_cachep) - goto err2; + if (!nf_ct_expect_cachep) + goto err2; + } - err = exp_proc_init(); + err = exp_proc_init(net); if (err < 0) goto err3; return 0; err3: - kmem_cache_destroy(nf_ct_expect_cachep); + if (net_eq(net, &init_net)) + kmem_cache_destroy(nf_ct_expect_cachep); err2: - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(void) +void nf_conntrack_expect_fini(struct net *net) { - exp_proc_remove(); - kmem_cache_destroy(nf_ct_expect_cachep); - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + exp_proc_remove(net); + if (net_eq(net, &init_net)) + kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 8a3f8b34e466..4b2c769d555f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -95,13 +95,11 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) newlen = newoff + t->len; rcu_read_unlock(); - if (newlen >= ksize(ct->ext)) { - new = kmalloc(newlen, gfp); - if (!new) - return NULL; - - memcpy(new, ct->ext, ct->ext->len); + new = __krealloc(ct->ext, newlen, gfp); + if (!new) + return NULL; + if (new != ct->ext) { for (i = 0; i < NF_CT_EXT_NUM; i++) { if (!nf_ct_ext_exist(ct, i)) continue; @@ -117,10 +115,10 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) ct->ext = new; } - ct->ext->offset[id] = newoff; - ct->ext->len = newlen; - memset((void *)ct->ext + newoff, 0, newlen - newoff); - return (void *)ct->ext + newoff; + new->offset[id] = newoff; + new->len = newlen; + memset((void *)new + newoff, 0, newlen - newoff); + return (void *)new + newoff; } EXPORT_SYMBOL(__nf_ct_ext_add); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index bb20672fe036..4f7107107e99 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, +static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, + struct nf_ct_ftp_master *info, int dir, struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } @@ -509,7 +510,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, skb); + update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 2f83c158934d..c1504f71cdff 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -709,7 +709,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ static int callforward_do_filter(const union nf_inet_addr *src, - const union nf_inet_addr *dst, int family) + const union nf_inet_addr *dst, + u_int8_t family) { const struct nf_afinfo *afinfo; struct flowi fl1, fl2; @@ -1209,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, union nf_inet_addr *addr, __be16 port) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1218,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(net, &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7d1b11703741..9c06b9f86ad4 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/rculist.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -122,29 +123,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, + struct net *net) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *n, *next; unsigned int i; - mutex_lock(&nf_ct_helper_mutex); - hlist_del_rcu(&me->hnode); - nf_ct_helper_count--; - mutex_unlock(&nf_ct_helper_mutex); - - /* Make sure every nothing is still using the helper unless its a - * connection in the hash. - */ - synchronize_rcu(); - - spin_lock_bh(&nf_conntrack_lock); - /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], hnode) { + &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((help->helper == me || exp->helper == me) && del_timer(&exp->timeout)) { @@ -155,12 +145,31 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, n, &unconfirmed, hnode) + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) unhelp(h, me); } +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + struct net *net; + + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); + nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); + + spin_lock_bh(&nf_conntrack_lock); + for_each_net(net) + __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1b1226d6653f..20633fdf7e6b 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -68,11 +68,21 @@ static const char *const dccprotos[] = { static int parse_dcc(char *data, const char *data_end, u_int32_t *ip, u_int16_t *port, char **ad_beg_p, char **ad_end_p) { + char *tmp; + /* at least 12: "AAAAAAAA P\1\n" */ while (*data++ != ' ') if (data > data_end - 12) return -1; + /* Make sure we have a newline character within the packet boundaries + * because simple_strtoul parses until the first invalid character. */ + for (tmp = data; tmp <= data_end; tmp++) + if (*tmp == '\n') + break; + if (tmp > data_end || *tmp != '\n') + return -1; + *ad_beg_p = data; *ip = simple_strtoul(data, &data, 10); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0edefcfc5949..cadfd15b44f6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> +#include <linux/rculist.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/skbuff.h> @@ -36,6 +37,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_acct.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_protocol.h> @@ -205,22 +207,26 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_CT_ACCT static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; + const struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS, - htonl(ct->counters[dir].packets)); - NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES, - htonl(ct->counters[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, + cpu_to_be64(acct[dir].packets)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, + cpu_to_be64(acct[dir].bytes)); nla_nest_end(skb, nest_count); @@ -229,9 +235,6 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, nla_put_failure: return -1; } -#else -#define ctnetlink_dump_counters(a, b, c) (0) -#endif #ifdef CONFIG_NF_CONNTRACK_MARK static inline int @@ -475,14 +478,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; + if (ctnetlink_dump_status(skb, ct) < 0) + goto nla_put_failure; + if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nla_put_failure; } else { - if (ctnetlink_dump_status(skb, ct) < 0) - goto nla_put_failure; - if (ctnetlink_dump_timeout(skb, ct) < 0) goto nla_put_failure; @@ -500,11 +503,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; #endif - if (events & IPCT_COUNTER_FILLING && - (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) - goto nla_put_failure; - if (events & IPCT_RELATED && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; @@ -551,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]], + hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -575,11 +573,15 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif + IPCTNL_MSG_CT_GET_CTRZERO) { + struct nf_conn_counter *acct; + + acct = nf_conn_acct_find(ct); + if (acct) + memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); + } } if (cb->args[1]) { cb->args[1] = 0; @@ -792,14 +794,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(); + nf_conntrack_flush(&init_net); return 0; } if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -812,9 +814,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; } } - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); nf_ct_put(ct); return 0; @@ -832,14 +833,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u_int8_t u3 = nfmsg->nfgen_family; int err = 0; - if (nlh->nlmsg_flags & NLM_F_DUMP) { -#ifndef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) - return -ENOTSUPP; -#endif + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); - } if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -851,7 +847,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -891,20 +887,19 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) /* unchangeable */ - return -EINVAL; + return -EBUSY; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ - return -EINVAL; - + return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ - return -EINVAL; + return -EBUSY; if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { #ifndef CONFIG_NF_NAT_NEEDED - return -EINVAL; + return -EOPNOTSUPP; #else struct nf_nat_range range; @@ -945,7 +940,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) /* don't change helper of sibling connections */ if (ct->master) - return -EINVAL; + return -EBUSY; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) @@ -963,7 +958,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) - return -EINVAL; + return -EOPNOTSUPP; if (help) { if (help->helper == helper) @@ -973,7 +968,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) /* need to zero data of old helper */ memset(&help->help, 0, sizeof(help->help)); } else { - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) return -ENOMEM; } @@ -1130,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(otuple, rtuple); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1141,36 +1136,42 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; + rcu_read_lock(); + helper = __nf_ct_helper_find(rtuple); + if (helper) { + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + if (help == NULL) { + rcu_read_unlock(); + err = -ENOMEM; + goto err; + } + /* not in hash table yet so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto err; + } } if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto err; + } } + nf_ct_acct_ext_add(ct, GFP_KERNEL); + #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); #endif - rcu_read_lock(); - helper = __nf_ct_helper_find(rtuple); - if (helper) { - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) { - rcu_read_unlock(); - err = -ENOMEM; - goto err; - } - /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); - } - /* setup master conntrack: this is a confirmed expectation */ if (master_ct) { __set_bit(IPS_EXPECTED_BIT, &ct->status); @@ -1212,9 +1213,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&otuple); + h = __nf_conntrack_find(&init_net, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&rtuple); + h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { struct nf_conntrack_tuple master; @@ -1229,7 +1230,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) goto out_unlock; - master_h = __nf_conntrack_find(&master); + master_h = __nf_conntrack_find(&init_net, &master); if (master_h == NULL) { err = -ENOENT; goto out_unlock; @@ -1258,12 +1259,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { /* we only allow nat config for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } /* can't link an existing conntrack to a master */ if (cda[CTA_TUPLE_MASTER]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), @@ -1457,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); struct hlist_node *n; @@ -1466,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -1528,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1582,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1608,11 +1610,11 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, h = __nf_conntrack_helper_find_byname(name); if (!h) { spin_unlock_bh(&nf_conntrack_lock); - return -EINVAL; + return -EOPNOTSUPP; } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); if (m_help->helper == h @@ -1628,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1669,7 +1671,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple); + h = nf_conntrack_find_get(&init_net, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1723,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 97e54b0e43a3..373e51e91ce5 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name); static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; pr_debug("increasing timeouts\n"); @@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) +static int destroy_sibling_or_exp(struct net *net, + const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; @@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(t); + h = nf_conntrack_find_get(net, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(t); + exp = nf_ct_expect_find_get(net, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) /* timeout GRE data connections */ static void pptp_destroy_siblings(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_tuple t; @@ -178,7 +181,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(&t)) + if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -186,7 +189,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(&t)) + if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -594,15 +597,32 @@ static struct nf_conntrack_helper pptp __read_mostly = { .expect_policy = &pptp_exp_policy, }; +static void nf_conntrack_pptp_net_exit(struct net *net) +{ + nf_ct_gre_keymap_flush(net); +} + +static struct pernet_operations nf_conntrack_pptp_net_ops = { + .exit = nf_conntrack_pptp_net_exit, +}; + static int __init nf_conntrack_pptp_init(void) { - return nf_conntrack_helper_register(&pptp); + int rv; + + rv = nf_conntrack_helper_register(&pptp); + if (rv < 0) + return rv; + rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops); + if (rv < 0) + nf_conntrack_helper_unregister(&pptp); + return rv; } static void __exit nf_conntrack_pptp_fini(void) { nf_conntrack_helper_unregister(&pptp); - nf_ct_gre_keymap_flush(); + unregister_pernet_subsys(&nf_conntrack_pptp_net_ops); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a49fc932629b..a59a307e685d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -207,6 +207,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) { + struct net *net; + BUG_ON(proto->l3proto >= AF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -219,7 +221,8 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l3proto, proto); + for_each_net(net) + nf_ct_iterate_cleanup(net, kill_l3proto, proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -316,6 +319,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) { + struct net *net; + BUG_ON(l4proto->l3proto >= PF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -328,7 +333,8 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l4proto, l4proto); + for_each_net(net) + nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index afb4a1861d2c..8fcf1762fabf 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { + struct net *net = nf_ct_net(ct); struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); return false; } @@ -461,8 +462,9 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, unsigned int hooknum) + u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -475,8 +477,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } @@ -525,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; @@ -546,9 +547,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, - unsigned int hooknum) +static int dccp_error(struct net *net, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, + u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; unsigned int dccp_len = skb->len - dataoff; @@ -576,7 +577,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff, } } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, pf)) { msg = "nf_ct_dccp: bad checksum "; @@ -591,7 +592,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e31b0e7bd0b1..dbe680af85d2 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -45,7 +45,7 @@ static int packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 654a4f7f12c6..a2cdbcbf64c4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -29,8 +29,11 @@ #include <linux/list.h> #include <linux/seq_file.h> #include <linux/in.h> +#include <linux/netdevice.h> #include <linux/skbuff.h> - +#include <net/dst.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> @@ -40,19 +43,23 @@ #define GRE_TIMEOUT (30 * HZ) #define GRE_STREAM_TIMEOUT (180 * HZ) -static DEFINE_RWLOCK(nf_ct_gre_lock); -static LIST_HEAD(gre_keymap_list); +static int proto_gre_net_id; +struct netns_proto_gre { + rwlock_t keymap_lock; + struct list_head keymap_list; +}; -void nf_ct_gre_keymap_flush(void) +void nf_ct_gre_keymap_flush(struct net *net) { - struct list_head *pos, *n; + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); + struct nf_ct_gre_keymap *km, *tmp; - write_lock_bh(&nf_ct_gre_lock); - list_for_each_safe(pos, n, &gre_keymap_list) { - list_del(pos); - kfree(pos); + write_lock_bh(&net_gre->keymap_lock); + list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { + list_del(&km->list); + kfree(km); } - write_unlock_bh(&nf_ct_gre_lock); + write_unlock_bh(&net_gre->keymap_lock); } EXPORT_SYMBOL(nf_ct_gre_keymap_flush); @@ -67,19 +74,20 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, } /* look up the source key for a given tuple */ -static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) +static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_ct_gre_keymap *km; __be16 key = 0; - read_lock_bh(&nf_ct_gre_lock); - list_for_each_entry(km, &gre_keymap_list, list) { + read_lock_bh(&net_gre->keymap_lock); + list_for_each_entry(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } - read_unlock_bh(&nf_ct_gre_lock); + read_unlock_bh(&net_gre->keymap_lock); pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); @@ -91,16 +99,22 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { + struct net *net = nf_ct_net(ct); + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_conn_help *help = nfct_help(ct); struct nf_ct_gre_keymap **kmp, *km; kmp = &help->help.ct_pptp_info.keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ - list_for_each_entry(km, &gre_keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) + read_lock_bh(&net_gre->keymap_lock); + list_for_each_entry(km, &net_gre->keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *kmp) { + read_unlock_bh(&net_gre->keymap_lock); return 0; + } } + read_unlock_bh(&net_gre->keymap_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; @@ -115,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); - write_lock_bh(&nf_ct_gre_lock); - list_add_tail(&km->list, &gre_keymap_list); - write_unlock_bh(&nf_ct_gre_lock); + write_lock_bh(&net_gre->keymap_lock); + list_add_tail(&km->list, &net_gre->keymap_list); + write_unlock_bh(&net_gre->keymap_lock); return 0; } @@ -126,12 +140,14 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_conn_help *help = nfct_help(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); - write_lock_bh(&nf_ct_gre_lock); + write_lock_bh(&net_gre->keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (help->help.ct_pptp_info.keymap[dir]) { pr_debug("removing %p from list\n", @@ -141,7 +157,7 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) help->help.ct_pptp_info.keymap[dir] = NULL; } } - write_unlock_bh(&nf_ct_gre_lock); + write_unlock_bh(&net_gre->keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); @@ -160,6 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { + struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; @@ -186,7 +203,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, } tuple->dst.u.gre.key = pgrehdr->call_id; - srckey = gre_keymap_lookup(tuple); + srckey = gre_keymap_lookup(net, tuple); tuple->src.u.gre.key = srckey; return true; @@ -215,7 +232,7 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is a GRE connection. @@ -225,7 +242,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); @@ -281,15 +298,53 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { #endif }; +static int proto_gre_net_init(struct net *net) +{ + struct netns_proto_gre *net_gre; + int rv; + + net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL); + if (!net_gre) + return -ENOMEM; + rwlock_init(&net_gre->keymap_lock); + INIT_LIST_HEAD(&net_gre->keymap_list); + + rv = net_assign_generic(net, proto_gre_net_id, net_gre); + if (rv < 0) + kfree(net_gre); + return rv; +} + +static void proto_gre_net_exit(struct net *net) +{ + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); + + nf_ct_gre_keymap_flush(net); + kfree(net_gre); +} + +static struct pernet_operations proto_gre_net_ops = { + .init = proto_gre_net_init, + .exit = proto_gre_net_exit, +}; + static int __init nf_ct_proto_gre_init(void) { - return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + int rv; + + rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + if (rv < 0) + return rv; + rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops); + if (rv < 0) + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + return rv; } static void nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); - nf_ct_gre_keymap_flush(); + unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops); } module_init(nf_ct_proto_gre_init); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index cbf2e27a22b2..ae8c2609e230 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -287,7 +287,7 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { enum sctp_conntrack new_state, old_state; @@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct, ct->proto.sctp.state = new_state; if (old_state != new_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } write_unlock_bh(&sctp_lock); @@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } return NF_ACCEPT; @@ -463,6 +463,82 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, + const struct nf_conn *ct) +{ + struct nlattr *nest_parms; + + read_lock_bh(&sctp_lock); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_REPLY, + ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); + + read_unlock_bh(&sctp_lock); + + nla_nest_end(skb, nest_parms); + + return 0; + +nla_put_failure: + read_unlock_bh(&sctp_lock); + return -1; +} + +static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { + [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, + [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, +}; + +static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) +{ + struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; + struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; + int err; + + /* updates may not contain the internal protocol info, skip parsing */ + if (!attr) + return 0; + + err = nla_parse_nested(tb, + CTA_PROTOINFO_SCTP_MAX, + attr, + sctp_nla_policy); + if (err < 0) + return err; + + if (!tb[CTA_PROTOINFO_SCTP_STATE] || + !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || + !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) + return -EINVAL; + + write_lock_bh(&sctp_lock); + ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); + ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = + nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); + write_unlock_bh(&sctp_lock); + + return 0; +} +#endif + #ifdef CONFIG_SYSCTL static unsigned int sctp_sysctl_table_users; static struct ctl_table_header *sctp_sysctl_header; @@ -591,6 +667,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, @@ -617,6 +695,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004fe323..f947ec41e391 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = { /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS; static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_SYN_SENT] = 2 MINS, @@ -331,12 +332,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + III. Upper bound for valid (s)ack: sack <= receiver.td_end + IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW - where sack is the highest right edge of sack block found in the packet. + where sack is the highest right edge of sack block found in the packet + or ack in the case of packet without SACK option. - The upper bound limit for a valid ack is not ignored - + The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ @@ -484,8 +486,9 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - int pf) + u_int8_t pf) { + struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -606,12 +609,12 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender))) { + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ @@ -624,8 +627,10 @@ static bool tcp_in_window(const struct nf_conn *ct, swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; - if (after(end, sender->td_end)) + if (after(end, sender->td_end)) { sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } /* * Update receiver data. */ @@ -636,6 +641,8 @@ static bool tcp_in_window(const struct nf_conn *ct, if (win == 0) receiver->td_maxend++; } + if (ack == receiver->td_end) + receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. @@ -662,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; - if (!res && LOG_INVALID(IPPROTO_TCP)) + if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -740,10 +747,11 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct sk_buff *skb, +static int tcp_error(struct net *net, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; @@ -754,7 +762,7 @@ static int tcp_error(struct sk_buff *skb, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; @@ -762,7 +770,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; @@ -773,9 +781,9 @@ static int tcp_error(struct sk_buff *skb, * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; @@ -784,7 +792,7 @@ static int tcp_error(struct sk_buff *skb, /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; @@ -798,9 +806,10 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -843,9 +852,14 @@ static int tcp_packet(struct nf_conn *ct, /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); - return -NF_REPEAT; + + /* Only repeat if we can actually remove the timer. + * Destruction may already be in progress in process + * context and we must give it a chance to terminate. + */ + if (nf_ct_kill(ct)) + return -NF_REPEAT; + return -NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: @@ -874,11 +888,10 @@ static int tcp_packet(struct nf_conn *ct, * thus initiate a clean new session. */ write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); return -NF_DROP; } ct->proto.tcp.last_index = index; @@ -888,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct, segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; @@ -897,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; @@ -946,14 +959,21 @@ static int tcp_packet(struct nf_conn *ct, if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans - && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans - ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state]; + + if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) + timeout = nf_ct_tcp_timeout_max_retrans; + else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & + IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) + timeout = nf_ct_tcp_timeout_unacknowledged; + else + timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -961,8 +981,7 @@ static int tcp_packet(struct nf_conn *ct, problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) @@ -973,7 +992,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); @@ -1232,6 +1251,13 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { + .procname = "nf_conntrack_tcp_timeout_unacknowledged", + .data = &nf_ct_tcp_timeout_unacknowledged, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, .procname = "nf_conntrack_tcp_loose", .data = &nf_ct_tcp_loose, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8b21762e65de..7c2ca48698be 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -66,7 +66,7 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); @@ -89,9 +89,9 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct sk_buff *skb, unsigned int dataoff, +static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -101,7 +101,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; @@ -109,7 +109,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; @@ -123,9 +123,9 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1fa62f3c24f1..d22d839e4f94 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -65,7 +65,7 @@ static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); @@ -89,9 +89,11 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct sk_buff *skb, unsigned int dataoff, +static int udplite_error(struct net *net, + struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -102,7 +104,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; @@ -112,7 +114,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, if (cscov == 0) cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; @@ -120,17 +122,17 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } /* Checksum invalid? Ignore. */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 2f9bbc058b48..6813f1c8863f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr *saddr; struct nf_conntrack_tuple tuple; @@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(net, &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || @@ -1193,7 +1194,6 @@ static const struct sip_handler sip_handlers[] = { static int process_sip_response(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { - static const struct sip_handler *handler; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen; @@ -1214,6 +1214,8 @@ static int process_sip_response(struct sk_buff *skb, dataoff = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + const struct sip_handler *handler; + handler = &sip_handlers[i]; if (handler->response == NULL) continue; @@ -1228,13 +1230,14 @@ static int process_sip_response(struct sk_buff *skb, static int process_sip_request(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { - static const struct sip_handler *handler; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + const struct sip_handler *handler; + handler = &sip_handlers[i]; if (handler->request == NULL) continue; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46ea542d0df9..98106d4e89f0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,6 +25,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); @@ -38,32 +39,21 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(print_tuple); -#ifdef CONFIG_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif - struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -73,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -146,7 +137,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) return -ENOSPC; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) @@ -157,7 +148,7 @@ static int ct_seq_show(struct seq_file *s, void *v) l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) return -ENOSPC; if (test_bit(IPS_ASSURED_BIT, &ct->status)) @@ -189,7 +180,7 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, + return seq_open_net(inode, file, &ct_seq_ops, sizeof(struct ct_iter_state)); } @@ -198,11 +189,12 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -212,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -220,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -238,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -278,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -286,56 +281,53 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -static int nf_conntrack_standalone_init_proc(void) +static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); + pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops); if (!pde) goto out_nf_conntrack; - pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat, + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) goto out_stat_nf_conntrack; return 0; out_stat_nf_conntrack: - proc_net_remove(&init_net, "nf_conntrack"); + proc_net_remove(net, "nf_conntrack"); out_nf_conntrack: return -ENOMEM; } -static void nf_conntrack_standalone_fini_proc(void) +static void nf_conntrack_standalone_fini_proc(struct net *net) { - remove_proc_entry("nf_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net_stat); + proc_net_remove(net, "nf_conntrack"); } #else -static int nf_conntrack_standalone_init_proc(void) +static int nf_conntrack_standalone_init_proc(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_proc(void) +static void nf_conntrack_standalone_fini_proc(struct net *net) { } #endif /* CONFIG_PROC_FS */ /* Sysctl support */ -int nf_conntrack_checksum __read_mostly = 1; -EXPORT_SYMBOL_GPL(nf_conntrack_checksum); - #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static struct ctl_table_header *nf_ct_sysctl_header; +static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { { @@ -349,7 +341,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -365,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -373,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -396,12 +388,6 @@ static ctl_table nf_ct_sysctl_table[] = { static ctl_table nf_ct_netfilter_table[] = { { - .ctl_name = NET_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = nf_ct_sysctl_table, - }, - { .ctl_name = NET_NF_CONNTRACK_MAX, .procname = "nf_conntrack_max", .data = &nf_conntrack_max, @@ -417,63 +403,109 @@ static struct ctl_path nf_ct_path[] = { { } }; -EXPORT_SYMBOL_GPL(nf_ct_log_invalid); - -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { - nf_ct_sysctl_header = - register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); - if (nf_ct_sysctl_header == NULL) { - printk("nf_conntrack: can't register to sysctl.\n"); - return -ENOMEM; + struct ctl_table *table; + + if (net_eq(net, &init_net)) { + nf_ct_netfilter_header = + register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) + goto out; } + + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out_kmemdup; + + table[1].data = &net->ct.count; + table[3].data = &net->ct.sysctl_checksum; + table[4].data = &net->ct.sysctl_log_invalid; + + net->ct.sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.sysctl_header) + goto out_unregister_netfilter; + return 0; +out_unregister_netfilter: + kfree(table); +out_kmemdup: + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); +out: + printk("nf_conntrack: can't register to sysctl.\n"); + return -ENOMEM; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { - unregister_sysctl_table(nf_ct_sysctl_header); + struct ctl_table *table; + + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); + table = net->ct.sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.sysctl_header); + kfree(table); } #else -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { } #endif /* CONFIG_SYSCTL */ -static int __init nf_conntrack_standalone_init(void) +static int nf_conntrack_net_init(struct net *net) { int ret; - ret = nf_conntrack_init(); + ret = nf_conntrack_init(net); if (ret < 0) - goto out; - ret = nf_conntrack_standalone_init_proc(); + goto out_init; + ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; - ret = nf_conntrack_standalone_init_sysctl(); + net->ct.sysctl_checksum = 1; + net->ct.sysctl_log_invalid = 0; + ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; return 0; out_sysctl: - nf_conntrack_standalone_fini_proc(); + nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup(); -out: + nf_conntrack_cleanup(net); +out_init: return ret; } +static void nf_conntrack_net_exit(struct net *net) +{ + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + nf_conntrack_cleanup(net); +} + +static struct pernet_operations nf_conntrack_net_ops = { + .init = nf_conntrack_net_init, + .exit = nf_conntrack_net_exit, +}; + +static int __init nf_conntrack_standalone_init(void) +{ + return register_pernet_subsys(&nf_conntrack_net_ops); +} + static void __exit nf_conntrack_standalone_fini(void) { - nf_conntrack_standalone_fini_sysctl(); - nf_conntrack_standalone_fini_proc(); - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); } module_init(nf_conntrack_standalone_init); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 196269c1e586..bf6609978af7 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -15,7 +15,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -25,7 +25,7 @@ extern unsigned int nf_iterate(struct list_head *head, /* nf_queue.c */ extern int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9fda6ee95a31..fa8ae5d2659c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -15,16 +15,16 @@ #define NF_LOG_PREFIXLEN 128 -static const struct nf_logger *nf_loggers[NPROTO] __read_mostly; +static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); /* return EBUSY if somebody else is registered, EEXIST if the same logger * is registred, 0 on success. */ -int nf_log_register(int pf, const struct nf_logger *logger) +int nf_log_register(u_int8_t pf, const struct nf_logger *logger) { int ret; - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(nf_loggers)) return -EINVAL; /* Any setup of logging members must be done before @@ -45,9 +45,9 @@ int nf_log_register(int pf, const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(int pf) +void nf_log_unregister_pf(u_int8_t pf) { - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(nf_loggers)) return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); @@ -63,7 +63,7 @@ void nf_log_unregister(const struct nf_logger *logger) int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { if (nf_loggers[i] == logger) rcu_assign_pointer(nf_loggers[i], NULL); } @@ -73,7 +73,7 @@ void nf_log_unregister(const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -103,7 +103,7 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) { rcu_read_lock(); - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(nf_loggers)) return NULL; return pos; @@ -113,7 +113,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(nf_loggers)) return NULL; return pos; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 582ec3efc8a5..4f2310c93e01 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -16,17 +16,17 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NPROTO]; +static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { int ret; - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); @@ -45,9 +45,9 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); @@ -67,10 +67,10 @@ EXPORT_SYMBOL(nf_unregister_queue_handler); void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) { - int pf; + u_int8_t pf; mutex_lock(&queue_handler_mutex); - for (pf = 0; pf < NPROTO; pf++) { + for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { if (queue_handler[pf] == qh) rcu_assign_pointer(queue_handler[pf], NULL); } @@ -107,7 +107,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) */ static int __nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -191,7 +191,7 @@ err: int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -285,7 +285,7 @@ EXPORT_SYMBOL(nf_reinject); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; @@ -295,7 +295,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 69d699f95f4c..8ab829f86574 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -60,14 +60,11 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg) } EXPORT_SYMBOL(nf_unregister_sockopt); -static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, +static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, int val, int get) { struct nf_sockopt_ops *ops; - if (sock_net(sk) != &init_net) - return ERR_PTR(-ENOPROTOOPT); - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return ERR_PTR(-EINTR); @@ -96,7 +93,7 @@ out: } /* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, +static int nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -115,21 +112,22 @@ static int nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(nf_setsockopt); -int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) +int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, + int *len) { return nf_sockopt(sk, pf, val, opt, len, 1); } EXPORT_SYMBOL(nf_getsockopt); #ifdef CONFIG_COMPAT -static int compat_nf_sockopt(struct sock *sk, int pf, int val, +static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -155,14 +153,14 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int compat_nf_setsockopt(struct sock *sk, int pf, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return compat_nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(compat_nf_setsockopt); -int compat_nf_getsockopt(struct sock *sk, int pf, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len) { return compat_nf_sockopt(sk, pf, val, opt, len, 1); diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c new file mode 100644 index 000000000000..cdc97f3105a3 --- /dev/null +++ b/net/netfilter/nf_tproxy_core.c @@ -0,0 +1,95 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +#include <linux/net.h> +#include <linux/if.h> +#include <linux/netdevice.h> +#include <net/udp.h> +#include <net/netfilter/nf_tproxy_core.h> + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening_only) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + if (listening_only) + sk = __inet_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + else + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +static void +nf_tproxy_destructor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb->sk = NULL; + skb->destructor = NULL; + + if (sk) + nf_tproxy_put_sock(sk); +} + +/* consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + if (inet_sk(sk)->transparent) { + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; + return 1; + } else + nf_tproxy_put_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); + +static int __init nf_tproxy_init(void) +{ + pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); + pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); + return 0; +} + +module_init(nf_tproxy_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Transparent proxy support core routines"); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b8173af8c24a..41e0105d3828 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -359,7 +359,7 @@ static inline int __build_packet_message(struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, - unsigned int pf, + u_int8_t pf, unsigned int hooknum, const struct net_device *indev, const struct net_device *outdev, @@ -453,6 +453,14 @@ __build_packet_message(struct nfulnl_instance *inst, } } + if (indev && skb_mac_header_was_set(skb)) { + NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); + NLA_PUT_BE16(inst->skb, NFULA_HWLEN, + htons(skb->dev->hard_header_len)); + NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, + skb_mac_header(skb)); + } + if (skb->tstamp.tv64) { struct nfulnl_msg_packet_timestamp ts; struct timeval tv = ktime_to_timeval(skb->tstamp); @@ -526,7 +534,7 @@ static struct nf_loginfo default_loginfo = { /* log handler for internal netfilter logging api */ static void -nfulnl_log_packet(unsigned int pf, +nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3447025ce068..8c860112ce05 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -243,7 +243,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, switch ((enum nfqnl_config_mode)queue->copy_mode) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: - data_len = 0; break; case NFQNL_COPY_PACKET: @@ -556,7 +555,7 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 5d75cd86ebb3..89837a4eef76 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -30,7 +30,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module"); +MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) @@ -58,17 +58,20 @@ static struct xt_af *xt; #define duprintf(format, args...) #endif -static const char *const xt_prefix[NPROTO] = { - [AF_INET] = "ip", - [AF_INET6] = "ip6", - [NF_ARP] = "arp", +static const char *const xt_prefix[NFPROTO_NUMPROTO] = { + [NFPROTO_UNSPEC] = "x", + [NFPROTO_IPV4] = "ip", + [NFPROTO_ARP] = "arp", + [NFPROTO_BRIDGE] = "eb", + [NFPROTO_IPV6] = "ip6", }; /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { - int ret, af = target->family; + u_int8_t af = target->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -82,7 +85,7 @@ EXPORT_SYMBOL(xt_register_target); void xt_unregister_target(struct xt_target *target) { - int af = target->family; + u_int8_t af = target->family; mutex_lock(&xt[af].mutex); list_del(&target->list); @@ -123,7 +126,8 @@ EXPORT_SYMBOL(xt_unregister_targets); int xt_register_match(struct xt_match *match) { - int ret, af = match->family; + u_int8_t af = match->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -139,7 +143,7 @@ EXPORT_SYMBOL(xt_register_match); void xt_unregister_match(struct xt_match *match) { - int af = match->family; + u_int8_t af = match->family; mutex_lock(&xt[af].mutex); list_del(&match->list); @@ -185,7 +189,7 @@ EXPORT_SYMBOL(xt_unregister_matches); */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ -struct xt_match *xt_find_match(int af, const char *name, u8 revision) +struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; int err = 0; @@ -205,12 +209,17 @@ struct xt_match *xt_find_match(int af, const char *name, u8 revision) } } mutex_unlock(&xt[af].mutex); + + if (af != NFPROTO_UNSPEC) + /* Try searching again in the family-independent list */ + return xt_find_match(NFPROTO_UNSPEC, name, revision); + return ERR_PTR(err); } EXPORT_SYMBOL(xt_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = 0; @@ -230,11 +239,16 @@ struct xt_target *xt_find_target(int af, const char *name, u8 revision) } } mutex_unlock(&xt[af].mutex); + + if (af != NFPROTO_UNSPEC) + /* Try searching again in the family-independent list */ + return xt_find_target(NFPROTO_UNSPEC, name, revision); + return ERR_PTR(err); } EXPORT_SYMBOL(xt_find_target); -struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; @@ -246,7 +260,7 @@ struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) } EXPORT_SYMBOL_GPL(xt_request_find_target); -static int match_revfn(int af, const char *name, u8 revision, int *bestp) +static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; int have_rev = 0; @@ -262,7 +276,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp) return have_rev; } -static int target_revfn(int af, const char *name, u8 revision, int *bestp) +static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_target *t; int have_rev = 0; @@ -279,7 +293,7 @@ static int target_revfn(int af, const char *name, u8 revision, int *bestp) } /* Returns true or false (if no such extension at all) */ -int xt_find_revision(int af, const char *name, u8 revision, int target, +int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err) { int have_rev, best = -1; @@ -307,37 +321,47 @@ int xt_find_revision(int af, const char *name, u8 revision, int target, } EXPORT_SYMBOL_GPL(xt_find_revision); -int xt_check_match(const struct xt_match *match, unsigned short family, - unsigned int size, const char *table, unsigned int hook_mask, - unsigned short proto, int inv_proto) +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u_int8_t proto, bool inv_proto) { - if (XT_ALIGN(match->matchsize) != size) { + if (XT_ALIGN(par->match->matchsize) != size && + par->match->matchsize != -1) { + /* + * ebt_among is exempt from centralized matchsize checking + * because it uses a dynamic-size data set. + */ printk("%s_tables: %s match: invalid size %Zu != %u\n", - xt_prefix[family], match->name, - XT_ALIGN(match->matchsize), size); + xt_prefix[par->family], par->match->name, + XT_ALIGN(par->match->matchsize), size); return -EINVAL; } - if (match->table && strcmp(match->table, table)) { + if (par->match->table != NULL && + strcmp(par->match->table, par->table) != 0) { printk("%s_tables: %s match: only valid in %s table, not %s\n", - xt_prefix[family], match->name, match->table, table); + xt_prefix[par->family], par->match->name, + par->match->table, par->table); return -EINVAL; } - if (match->hooks && (hook_mask & ~match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %u/%u\n", - xt_prefix[family], match->name, hook_mask, match->hooks); + if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { + printk("%s_tables: %s match: bad hook_mask %#x/%#x\n", + xt_prefix[par->family], par->match->name, + par->hook_mask, par->match->hooks); return -EINVAL; } - if (match->proto && (match->proto != proto || inv_proto)) { + if (par->match->proto && (par->match->proto != proto || inv_proto)) { printk("%s_tables: %s match: only valid for protocol %u\n", - xt_prefix[family], match->name, match->proto); + xt_prefix[par->family], par->match->name, + par->match->proto); return -EINVAL; } + if (par->match->checkentry != NULL && !par->match->checkentry(par)) + return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_add_offset(int af, unsigned int offset, short delta) +int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) { struct compat_delta *tmp; @@ -359,7 +383,7 @@ int xt_compat_add_offset(int af, unsigned int offset, short delta) } EXPORT_SYMBOL_GPL(xt_compat_add_offset); -void xt_compat_flush_offsets(int af) +void xt_compat_flush_offsets(u_int8_t af) { struct compat_delta *tmp, *next; @@ -373,7 +397,7 @@ void xt_compat_flush_offsets(int af) } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); -short xt_compat_calc_jump(int af, unsigned int offset) +short xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp; short delta; @@ -448,32 +472,36 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, EXPORT_SYMBOL_GPL(xt_compat_match_to_user); #endif /* CONFIG_COMPAT */ -int xt_check_target(const struct xt_target *target, unsigned short family, - unsigned int size, const char *table, unsigned int hook_mask, - unsigned short proto, int inv_proto) +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u_int8_t proto, bool inv_proto) { - if (XT_ALIGN(target->targetsize) != size) { + if (XT_ALIGN(par->target->targetsize) != size) { printk("%s_tables: %s target: invalid size %Zu != %u\n", - xt_prefix[family], target->name, - XT_ALIGN(target->targetsize), size); + xt_prefix[par->family], par->target->name, + XT_ALIGN(par->target->targetsize), size); return -EINVAL; } - if (target->table && strcmp(target->table, table)) { + if (par->target->table != NULL && + strcmp(par->target->table, par->table) != 0) { printk("%s_tables: %s target: only valid in %s table, not %s\n", - xt_prefix[family], target->name, target->table, table); + xt_prefix[par->family], par->target->name, + par->target->table, par->table); return -EINVAL; } - if (target->hooks && (hook_mask & ~target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %u/%u\n", - xt_prefix[family], target->name, hook_mask, - target->hooks); + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { + printk("%s_tables: %s target: bad hook_mask %#x/%#x\n", + xt_prefix[par->family], par->target->name, + par->hook_mask, par->target->hooks); return -EINVAL; } - if (target->proto && (target->proto != proto || inv_proto)) { + if (par->target->proto && (par->target->proto != proto || inv_proto)) { printk("%s_tables: %s target: only valid for protocol %u\n", - xt_prefix[family], target->name, target->proto); + xt_prefix[par->family], par->target->name, + par->target->proto); return -EINVAL; } + if (par->target->checkentry != NULL && !par->target->checkentry(par)) + return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(xt_check_target); @@ -590,7 +618,8 @@ void xt_free_table_info(struct xt_table_info *info) EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, + const char *name) { struct xt_table *t; @@ -612,13 +641,13 @@ void xt_table_unlock(struct xt_table *table) EXPORT_SYMBOL_GPL(xt_table_unlock); #ifdef CONFIG_COMPAT -void xt_compat_lock(int af) +void xt_compat_lock(u_int8_t af) { mutex_lock(&xt[af].compat_mutex); } EXPORT_SYMBOL_GPL(xt_compat_lock); -void xt_compat_unlock(int af) +void xt_compat_unlock(u_int8_t af) { mutex_unlock(&xt[af].compat_mutex); } @@ -722,13 +751,13 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS struct xt_names_priv { struct seq_net_private p; - int af; + u_int8_t af; }; static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; mutex_lock(&xt[af].mutex); return seq_list_start(&net->xt.tables[af], *pos); @@ -738,7 +767,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; return seq_list_next(v, &net->xt.tables[af], pos); } @@ -746,7 +775,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void xt_table_seq_stop(struct seq_file *seq, void *v) { struct xt_names_priv *priv = seq->private; - int af = priv->af; + u_int8_t af = priv->af; mutex_unlock(&xt[af].mutex); } @@ -922,14 +951,14 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ -int xt_proto_init(struct net *net, int af) +int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; #endif - if (af >= NPROTO) + if (af >= ARRAY_SIZE(xt_prefix)) return -EINVAL; @@ -974,7 +1003,7 @@ out: } EXPORT_SYMBOL_GPL(xt_proto_init); -void xt_proto_fini(struct net *net, int af) +void xt_proto_fini(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; @@ -998,7 +1027,7 @@ static int __net_init xt_net_init(struct net *net) { int i; - for (i = 0; i < NPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&net->xt.tables[i]); return 0; } @@ -1011,11 +1040,11 @@ static int __init xt_init(void) { int i, rv; - xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL); + xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) return -ENOMEM; - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < NFPROTO_NUMPROTO; i++) { mutex_init(&xt[i].mutex); #ifdef CONFIG_COMPAT mutex_init(&xt[i].compat_mutex); diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 77a52bf83225..011bc80dd2a1 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -27,50 +27,34 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -classify_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +classify_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_classify_target_info *clinfo = targinfo; + const struct xt_classify_target_info *clinfo = par->targinfo; skb->priority = clinfo->priority; return XT_CONTINUE; } -static struct xt_target classify_tg_reg[] __read_mostly = { - { - .family = AF_INET, - .name = "CLASSIFY", - .target = classify_tg, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "CLASSIFY", - .family = AF_INET6, - .target = classify_tg, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, +static struct xt_target classify_tg_reg __read_mostly = { + .name = "CLASSIFY", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "mangle", + .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_POST_ROUTING), + .target = classify_tg, + .targetsize = sizeof(struct xt_classify_target_info), + .me = THIS_MODULE, }; static int __init classify_tg_init(void) { - return xt_register_targets(classify_tg_reg, - ARRAY_SIZE(classify_tg_reg)); + return xt_register_target(&classify_tg_reg); } static void __exit classify_tg_exit(void) { - xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); + xt_unregister_target(&classify_tg_reg); } module_init(classify_tg_init); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 5fecfb4794b1..d6e5ab463277 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -36,11 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include <net/netfilter/nf_conntrack_ecache.h> static unsigned int -connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connmark_target_info *markinfo = targinfo; + const struct xt_connmark_target_info *markinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t diff; @@ -54,7 +52,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -62,7 +60,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -77,11 +75,9 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, } static unsigned int -connmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connmark_tginfo1 *info = targinfo; + const struct xt_connmark_tginfo1 *info = par->targinfo; enum ip_conntrack_info ctinfo; struct nf_conn *ct; u_int32_t newmark; @@ -95,7 +91,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -103,7 +99,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, (skb->mark & info->nfmask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -116,18 +112,15 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -connmark_tg_check_v0(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connmark_tg_check_v0(const struct xt_tgchk_param *par) { - const struct xt_connmark_target_info *matchinfo = targinfo; + const struct xt_connmark_target_info *matchinfo = par->targinfo; if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(tablename, "mangle") != 0) { + if (strcmp(par->table, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " "called from \"mangle\" table, not \"%s\"\n", - tablename); + par->table); return false; } } @@ -135,31 +128,27 @@ connmark_tg_check_v0(const char *tablename, const void *entry, printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return false; } - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static bool -connmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connmark_tg_check(const struct xt_tgchk_param *par) { - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -197,7 +186,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = { { .name = "CONNMARK", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = connmark_tg_check_v0, .destroy = connmark_tg_destroy, .target = connmark_tg_v0, @@ -210,34 +199,9 @@ static struct xt_target connmark_tg_reg[] __read_mostly = { .me = THIS_MODULE }, { - .name = "CONNMARK", - .revision = 0, - .family = AF_INET6, - .checkentry = connmark_tg_check_v0, - .destroy = connmark_tg_destroy, - .target = connmark_tg_v0, - .targetsize = sizeof(struct xt_connmark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_target_info), - .compat_from_user = connmark_tg_compat_from_user_v0, - .compat_to_user = connmark_tg_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "CONNMARK", - .revision = 1, - .family = AF_INET, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, - }, - { .name = "CONNMARK", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = connmark_tg_check, .target = connmark_tg, .targetsize = sizeof(struct xt_connmark_tginfo1), diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 211189eb2b67..b54c3756fdc3 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -8,7 +8,7 @@ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb) ct = nf_ct_get(skb, &ctinfo); if (ct && !ct->secmark) { ct->secmark = skb->secmark; - nf_conntrack_event_cache(IPCT_SECMARK, skb); + nf_conntrack_event_cache(IPCT_SECMARK, ct); } } } @@ -65,11 +65,9 @@ static void secmark_restore(struct sk_buff *skb) } static unsigned int -connsecmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = par->targinfo; switch (info->mode) { case CONNSECMARK_SAVE: @@ -87,12 +85,16 @@ connsecmark_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -connsecmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connsecmark_tg_check(const struct xt_tgchk_param *par) { - const struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = par->targinfo; + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "security") != 0) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", par->table); + return false; + } switch (info->mode) { case CONNSECMARK_SAVE: @@ -104,53 +106,38 @@ connsecmark_tg_check(const char *tablename, const void *entry, return false; } - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connsecmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_target connsecmark_tg_reg[] __read_mostly = { - { - .name = "CONNSECMARK", - .family = AF_INET, - .checkentry = connsecmark_tg_check, - .destroy = connsecmark_tg_destroy, - .target = connsecmark_tg, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "CONNSECMARK", - .family = AF_INET6, - .checkentry = connsecmark_tg_check, - .destroy = connsecmark_tg_destroy, - .target = connsecmark_tg, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, +static struct xt_target connsecmark_tg_reg __read_mostly = { + .name = "CONNSECMARK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connsecmark_tg_check, + .destroy = connsecmark_tg_destroy, + .target = connsecmark_tg, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .me = THIS_MODULE, }; static int __init connsecmark_tg_init(void) { - return xt_register_targets(connsecmark_tg_reg, - ARRAY_SIZE(connsecmark_tg_reg)); + return xt_register_target(&connsecmark_tg_reg); } static void __exit connsecmark_tg_exit(void) { - xt_unregister_targets(connsecmark_tg_reg, - ARRAY_SIZE(connsecmark_tg_reg)); + xt_unregister_target(&connsecmark_tg_reg); } module_init(connsecmark_tg_init); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 97efd74c04fe..6a347e768f86 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -29,11 +29,9 @@ MODULE_ALIAS("ipt_TOS"); MODULE_ALIAS("ip6t_TOS"); static unsigned int -dscp_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +dscp_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_DSCP_info *dinfo = targinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { @@ -48,11 +46,9 @@ dscp_tg(struct sk_buff *skb, const struct net_device *in, } static unsigned int -dscp_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_DSCP_info *dinfo = targinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { @@ -65,26 +61,21 @@ dscp_tg6(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -dscp_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool dscp_tg_check(const struct xt_tgchk_param *par) { - const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; + const struct xt_DSCP_info *info = par->targinfo; - if (dscp > XT_DSCP_MAX) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + if (info->dscp > XT_DSCP_MAX) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp); return false; } return true; } static unsigned int -tos_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_tos_target_info *info = targinfo; + const struct ipt_tos_target_info *info = par->targinfo; struct iphdr *iph = ip_hdr(skb); u_int8_t oldtos; @@ -101,12 +92,10 @@ tos_tg_v0(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -tos_tg_check_v0(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tos_tg_check_v0(const struct xt_tgchk_param *par) { - const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; + const struct ipt_tos_target_info *info = par->targinfo; + const uint8_t tos = info->tos; if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT && tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST && @@ -119,11 +108,9 @@ tos_tg_check_v0(const char *tablename, const void *e_void, } static unsigned int -tos_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_tos_target_info *info = targinfo; + const struct xt_tos_target_info *info = par->targinfo; struct iphdr *iph = ip_hdr(skb); u_int8_t orig, nv; @@ -141,11 +128,9 @@ tos_tg(struct sk_buff *skb, const struct net_device *in, } static unsigned int -tos_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_tos_target_info *info = targinfo; + const struct xt_tos_target_info *info = par->targinfo; struct ipv6hdr *iph = ipv6_hdr(skb); u_int8_t orig, nv; @@ -165,7 +150,7 @@ tos_tg6(struct sk_buff *skb, const struct net_device *in, static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "DSCP", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dscp_tg_check, .target = dscp_tg, .targetsize = sizeof(struct xt_DSCP_info), @@ -174,7 +159,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { }, { .name = "DSCP", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dscp_tg_check, .target = dscp_tg6, .targetsize = sizeof(struct xt_DSCP_info), @@ -184,7 +169,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .target = tos_tg_v0, .targetsize = sizeof(struct ipt_tos_target_info), @@ -194,7 +179,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .target = tos_tg, .targetsize = sizeof(struct xt_tos_target_info), @@ -203,7 +188,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .table = "mangle", .target = tos_tg6, .targetsize = sizeof(struct xt_tos_target_info), diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index f9ce20b58981..67574bcfb8ac 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -25,22 +25,18 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -mark_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = par->targinfo; skb->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -mark_tg_v1(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = par->targinfo; int mark = 0; switch (markinfo->mode) { @@ -62,22 +58,17 @@ mark_tg_v1(struct sk_buff *skb, const struct net_device *in, } static unsigned int -mark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_tginfo2 *info = targinfo; + const struct xt_mark_tginfo2 *info = par->targinfo; skb->mark = (skb->mark & ~info->mask) ^ info->mark; return XT_CONTINUE; } -static bool -mark_tg_check_v0(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool mark_tg_check_v0(const struct xt_tgchk_param *par) { - const struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = par->targinfo; if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); @@ -86,12 +77,9 @@ mark_tg_check_v0(const char *tablename, const void *entry, return true; } -static bool -mark_tg_check_v1(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool mark_tg_check_v1(const struct xt_tgchk_param *par) { - const struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = par->targinfo; if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND @@ -161,7 +149,7 @@ static int mark_tg_compat_to_user_v1(void __user *dst, void *src) static struct xt_target mark_tg_reg[] __read_mostly = { { .name = "MARK", - .family = AF_INET, + .family = NFPROTO_UNSPEC, .revision = 0, .checkentry = mark_tg_check_v0, .target = mark_tg_v0, @@ -176,7 +164,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { }, { .name = "MARK", - .family = AF_INET, + .family = NFPROTO_UNSPEC, .revision = 1, .checkentry = mark_tg_check_v1, .target = mark_tg_v1, @@ -190,47 +178,9 @@ static struct xt_target mark_tg_reg[] __read_mostly = { .me = THIS_MODULE, }, { - .name = "MARK", - .family = AF_INET6, - .revision = 0, - .checkentry = mark_tg_check_v0, - .target = mark_tg_v0, - .targetsize = sizeof(struct xt_mark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info), - .compat_from_user = mark_tg_compat_from_user_v0, - .compat_to_user = mark_tg_compat_to_user_v0, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .family = AF_INET6, - .revision = 1, - .checkentry = mark_tg_check_v1, - .target = mark_tg_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info_v1), - .compat_from_user = mark_tg_compat_from_user_v1, - .compat_to_user = mark_tg_compat_to_user_v1, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .revision = 2, - .family = AF_INET, - .target = mark_tg, - .targetsize = sizeof(struct xt_mark_tginfo2), - .me = THIS_MODULE, - }, - { .name = "MARK", .revision = 2, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .target = mark_tg, .targetsize = sizeof(struct xt_mark_tginfo2), .me = THIS_MODULE, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 19ae8efae655..50e3a52d3b31 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -21,11 +21,9 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_nflog_info *info = targinfo; + const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; @@ -33,17 +31,14 @@ nflog_tg(struct sk_buff *skb, const struct net_device *in, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, skb, in, out, &li, - "%s", info->prefix); + nf_log_packet(par->family, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return XT_CONTINUE; } -static bool -nflog_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targetinfo, - unsigned int hookmask) +static bool nflog_tg_check(const struct xt_tgchk_param *par) { - const struct xt_nflog_info *info = targetinfo; + const struct xt_nflog_info *info = par->targinfo; if (info->flags & ~XT_NFLOG_MASK) return false; @@ -52,33 +47,24 @@ nflog_tg_check(const char *tablename, const void *entry, return true; } -static struct xt_target nflog_tg_reg[] __read_mostly = { - { - .name = "NFLOG", - .family = AF_INET, - .checkentry = nflog_tg_check, - .target = nflog_tg, - .targetsize = sizeof(struct xt_nflog_info), - .me = THIS_MODULE, - }, - { - .name = "NFLOG", - .family = AF_INET6, - .checkentry = nflog_tg_check, - .target = nflog_tg, - .targetsize = sizeof(struct xt_nflog_info), - .me = THIS_MODULE, - }, +static struct xt_target nflog_tg_reg __read_mostly = { + .name = "NFLOG", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = nflog_tg_check, + .target = nflog_tg, + .targetsize = sizeof(struct xt_nflog_info), + .me = THIS_MODULE, }; static int __init nflog_tg_init(void) { - return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg)); + return xt_register_target(&nflog_tg_reg); } static void __exit nflog_tg_exit(void) { - xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg)); + xt_unregister_target(&nflog_tg_reg); } module_init(nflog_tg_init); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index beb24d19a56f..2cc1fff49307 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -24,11 +24,9 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -nfqueue_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_NFQ_info *tinfo = targinfo; + const struct xt_NFQ_info *tinfo = par->targinfo; return NF_QUEUE_NR(tinfo->queuenum); } @@ -36,14 +34,14 @@ nfqueue_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 6c9de611eb8d..e7a0a54fd4ea 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -13,9 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -notrack_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +notrack_tg(struct sk_buff *skb, const struct xt_target_param *par) { /* Previously seen (loopback)? Ignore. */ if (skb->nfct != NULL) @@ -32,31 +30,23 @@ notrack_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static struct xt_target notrack_tg_reg[] __read_mostly = { - { - .name = "NOTRACK", - .family = AF_INET, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, - }, - { - .name = "NOTRACK", - .family = AF_INET6, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, - }, +static struct xt_target notrack_tg_reg __read_mostly = { + .name = "NOTRACK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = notrack_tg, + .table = "raw", + .me = THIS_MODULE, }; static int __init notrack_tg_init(void) { - return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg)); + return xt_register_target(¬rack_tg_reg); } static void __exit notrack_tg_exit(void) { - xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg)); + xt_unregister_target(¬rack_tg_reg); } module_init(notrack_tg_init); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 64d6ad380293..43f5676b1af4 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -71,14 +71,9 @@ void xt_rateest_put(struct xt_rateest *est) EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int -xt_rateest_tg(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_rateest_target_info *info = targinfo; + const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); @@ -89,14 +84,9 @@ xt_rateest_tg(struct sk_buff *skb, return XT_CONTINUE; } -static bool -xt_rateest_tg_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { - struct xt_rateest_target_info *info = targinfo; + struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { struct nlattr opt; @@ -149,33 +139,22 @@ err1: return false; } -static void xt_rateest_tg_destroy(const struct xt_target *target, - void *targinfo) +static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { - struct xt_rateest_target_info *info = targinfo; + struct xt_rateest_target_info *info = par->targinfo; xt_rateest_put(info->est); } -static struct xt_target xt_rateest_target[] __read_mostly = { - { - .family = AF_INET, - .name = "RATEEST", - .target = xt_rateest_tg, - .checkentry = xt_rateest_tg_checkentry, - .destroy = xt_rateest_tg_destroy, - .targetsize = sizeof(struct xt_rateest_target_info), - .me = THIS_MODULE, - }, - { - .family = AF_INET6, - .name = "RATEEST", - .target = xt_rateest_tg, - .checkentry = xt_rateest_tg_checkentry, - .destroy = xt_rateest_tg_destroy, - .targetsize = sizeof(struct xt_rateest_target_info), - .me = THIS_MODULE, - }, +static struct xt_target xt_rateest_tg_reg __read_mostly = { + .name = "RATEEST", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = xt_rateest_tg, + .checkentry = xt_rateest_tg_checkentry, + .destroy = xt_rateest_tg_destroy, + .targetsize = sizeof(struct xt_rateest_target_info), + .me = THIS_MODULE, }; static int __init xt_rateest_tg_init(void) @@ -186,13 +165,12 @@ static int __init xt_rateest_tg_init(void) INIT_HLIST_HEAD(&rateest_hash[i]); get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); - return xt_register_targets(xt_rateest_target, - ARRAY_SIZE(xt_rateest_target)); + return xt_register_target(&xt_rateest_tg_reg); } static void __exit xt_rateest_tg_fini(void) { - xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target)); + xt_unregister_target(&xt_rateest_tg_reg); } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c0284856ccd4..7a6f9e6f5dfa 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -5,7 +5,7 @@ * Based on the nfmark match by: * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -29,12 +29,10 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; static unsigned int -secmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +secmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { u32 secmark = 0; - const struct xt_secmark_target_info *info = targinfo; + const struct xt_secmark_target_info *info = par->targinfo; BUG_ON(info->mode != mode); @@ -82,12 +80,16 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info) return true; } -static bool -secmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool secmark_tg_check(const struct xt_tgchk_param *par) { - struct xt_secmark_target_info *info = targinfo; + struct xt_secmark_target_info *info = par->targinfo; + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "security") != 0) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", par->table); + return false; + } if (mode && mode != info->mode) { printk(KERN_INFO PFX "mode already set to %hu cannot mix with " @@ -111,7 +113,7 @@ secmark_tg_check(const char *tablename, const void *entry, return true; } -static void secmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void secmark_tg_destroy(const struct xt_tgdtor_param *par) { switch (mode) { case SECMARK_MODE_SEL: @@ -119,37 +121,25 @@ static void secmark_tg_destroy(const struct xt_target *target, void *targinfo) } } -static struct xt_target secmark_tg_reg[] __read_mostly = { - { - .name = "SECMARK", - .family = AF_INET, - .checkentry = secmark_tg_check, - .destroy = secmark_tg_destroy, - .target = secmark_tg, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "SECMARK", - .family = AF_INET6, - .checkentry = secmark_tg_check, - .destroy = secmark_tg_destroy, - .target = secmark_tg, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, +static struct xt_target secmark_tg_reg __read_mostly = { + .name = "SECMARK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = secmark_tg_check, + .destroy = secmark_tg_destroy, + .target = secmark_tg, + .targetsize = sizeof(struct xt_secmark_target_info), + .me = THIS_MODULE, }; static int __init secmark_tg_init(void) { - return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); + return xt_register_target(&secmark_tg_reg); } static void __exit secmark_tg_exit(void) { - xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); + xt_unregister_target(&secmark_tg_reg); } module_init(secmark_tg_init); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 217e2b686322..4f3b1f808795 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -147,17 +147,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph) +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) { - struct flowi fl = { - .fl4_dst = iph->saddr, - }; + struct flowi fl = {}; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; + if (family == PF_INET) + fl.fl4_dst = ip_hdr(skb)->saddr; + else + fl.fl6_dst = ipv6_hdr(skb)->saddr; + rcu_read_lock(); - ai = nf_get_afinfo(AF_INET); + ai = nf_get_afinfo(family); if (ai != NULL) ai->route((struct dst_entry **)&rt, &fl); rcu_read_unlock(); @@ -170,15 +174,14 @@ static u_int32_t tcpmss_reverse_mtu4(const struct iphdr *iph) } static unsigned int -tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph = ip_hdr(skb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu4(iph), + ret = tcpmss_mangle_packet(skb, par->targinfo, + tcpmss_reverse_mtu(skb, PF_INET), iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -193,32 +196,8 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u_int32_t tcpmss_reverse_mtu6(const struct ipv6hdr *iph) -{ - struct flowi fl = { - .fl6_dst = iph->saddr, - }; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - rcu_read_lock(); - ai = nf_get_afinfo(AF_INET6); - if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); - } - return mtu; -} - static unsigned int -tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; @@ -229,7 +208,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) return NF_DROP; - ret = tcpmss_mangle_packet(skb, targinfo, tcpmss_reverse_mtu6(ipv6h), + ret = tcpmss_mangle_packet(skb, par->targinfo, + tcpmss_reverse_mtu(skb, PF_INET6), tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) @@ -257,16 +237,13 @@ static inline bool find_syn_match(const struct xt_entry_match *m) return false; } -static bool -tcpmss_tg4_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = targinfo; - const struct ipt_entry *e = entry; + const struct xt_tcpmss_info *info = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_INET_FORWARD) | + (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " @@ -280,16 +257,13 @@ tcpmss_tg4_check(const char *tablename, const void *entry, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static bool -tcpmss_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = targinfo; - const struct ip6t_entry *e = entry; + const struct xt_tcpmss_info *info = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_INET_FORWARD) | + (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " @@ -305,7 +279,7 @@ tcpmss_tg6_check(const char *tablename, const void *entry, static struct xt_target tcpmss_tg_reg[] __read_mostly = { { - .family = AF_INET, + .family = NFPROTO_IPV4, .name = "TCPMSS", .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, @@ -315,7 +289,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { }, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { - .family = AF_INET6, + .family = NFPROTO_IPV6, .name = "TCPMSS", .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9685b6fcbc81..9dd8c8ef63eb 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -75,19 +75,15 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, } static unsigned int -tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par) { - return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb), + return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb), sizeof(struct iphdr) + sizeof(struct tcphdr)); } #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) static unsigned int -tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); int tcphoff; @@ -98,7 +94,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, if (tcphoff < 0) return NF_DROP; - return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff, + return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); } #endif @@ -106,7 +102,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { { .name = "TCPOPTSTRIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .proto = IPPROTO_TCP, .target = tcpoptstrip_tg4, @@ -116,7 +112,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) { .name = "TCPOPTSTRIP", - .family = AF_INET6, + .family = NFPROTO_IPV6, .table = "mangle", .proto = IPPROTO_TCP, .target = tcpoptstrip_tg6, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c new file mode 100644 index 000000000000..1340c2fa3621 --- /dev/null +++ b/net/netfilter/xt_TPROXY.c @@ -0,0 +1,102 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/udp.h> +#include <net/inet_sock.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/xt_TPROXY.h> + +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <net/netfilter/nf_tproxy_core.h> + +static unsigned int +tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct xt_tproxy_target_info *tgi = par->targinfo; + struct udphdr _hdr, *hp; + struct sock *sk; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) + return NF_DROP; + + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, tgi->lport ? tgi->lport : hp->dest, + par->in, true); + + /* NOTE: assign_sock consumes our sk reference */ + if (sk && nf_tproxy_assign_sock(skb, sk)) { + /* This should be in a separate target, but we don't do multiple + targets on the same rule yet */ + skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; + + pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_ACCEPT; + } + + pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_DROP; +} + +static bool tproxy_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_ip *i = par->entryinfo; + + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) + && !(i->invflags & IPT_INV_PROTO)) + return true; + + pr_info("xt_TPROXY: Can be used only in combination with " + "either -p tcp or -p udp\n"); + return false; +} + +static struct xt_target tproxy_tg_reg __read_mostly = { + .name = "TPROXY", + .family = AF_INET, + .table = "mangle", + .target = tproxy_tg, + .targetsize = sizeof(struct xt_tproxy_target_info), + .checkentry = tproxy_tg_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init tproxy_tg_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_target(&tproxy_tg_reg); +} + +static void __exit tproxy_tg_exit(void) +{ + xt_unregister_target(&tproxy_tg_reg); +} + +module_init(tproxy_tg_init); +module_exit(tproxy_tg_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); +MODULE_ALIAS("ipt_TPROXY"); diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 30dab79a3438..fbb04b86c46b 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -11,39 +11,29 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -trace_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +trace_tg(struct sk_buff *skb, const struct xt_target_param *par) { skb->nf_trace = 1; return XT_CONTINUE; } -static struct xt_target trace_tg_reg[] __read_mostly = { - { - .name = "TRACE", - .family = AF_INET, - .target = trace_tg, - .table = "raw", - .me = THIS_MODULE, - }, - { - .name = "TRACE", - .family = AF_INET6, - .target = trace_tg, - .table = "raw", - .me = THIS_MODULE, - }, +static struct xt_target trace_tg_reg __read_mostly = { + .name = "TRACE", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "raw", + .target = trace_tg, + .me = THIS_MODULE, }; static int __init trace_tg_init(void) { - return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); + return xt_register_target(&trace_tg_reg); } static void __exit trace_tg_exit(void) { - xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); + xt_unregister_target(&trace_tg_reg); } module_init(trace_tg_init); diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 89f47364e848..e82179832acd 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -16,40 +16,29 @@ MODULE_ALIAS("ipt_comment"); MODULE_ALIAS("ip6t_comment"); static bool -comment_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protooff, - bool *hotdrop) +comment_mt(const struct sk_buff *skb, const struct xt_match_param *par) { /* We always match */ return true; } -static struct xt_match comment_mt_reg[] __read_mostly = { - { - .name = "comment", - .family = AF_INET, - .match = comment_mt, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, - { - .name = "comment", - .family = AF_INET6, - .match = comment_mt, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, +static struct xt_match comment_mt_reg __read_mostly = { + .name = "comment", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = comment_mt, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE, }; static int __init comment_mt_init(void) { - return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg)); + return xt_register_match(&comment_mt_reg); } static void __exit comment_mt_exit(void) { - xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg)); + xt_unregister_match(&comment_mt_reg); } module_init(comment_mt_init); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d7e8983cd37f..955e6598a7f0 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -8,6 +8,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -16,23 +17,23 @@ MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool -connbytes_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connbytes_info *sinfo = matchinfo; + const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; - const struct ip_conntrack_counter *counters; + const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = ct->counters; + + counters = nf_conn_acct_find(ct); + if (!counters) + return false; switch (sinfo->what) { case XT_CONNBYTES_PKTS: @@ -91,12 +92,9 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in, return what >= sinfo->count.from; } -static bool -connbytes_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connbytes_mt_check(const struct xt_mtchk_param *par) { - const struct xt_connbytes_info *sinfo = matchinfo; + const struct xt_connbytes_info *sinfo = par->matchinfo; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && @@ -108,51 +106,39 @@ connbytes_mt_check(const char *tablename, const void *ip, sinfo->direction != XT_CONNBYTES_DIR_BOTH) return false; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connbytes_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_match connbytes_mt_reg[] __read_mostly = { - { - .name = "connbytes", - .family = AF_INET, - .checkentry = connbytes_mt_check, - .match = connbytes_mt, - .destroy = connbytes_mt_destroy, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, - { - .name = "connbytes", - .family = AF_INET6, - .checkentry = connbytes_mt_check, - .match = connbytes_mt, - .destroy = connbytes_mt_destroy, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, +static struct xt_match connbytes_mt_reg __read_mostly = { + .name = "connbytes", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connbytes_mt_check, + .match = connbytes_mt, + .destroy = connbytes_mt_destroy, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE, }; static int __init connbytes_mt_init(void) { - return xt_register_matches(connbytes_mt_reg, - ARRAY_SIZE(connbytes_mt_reg)); + return xt_register_match(&connbytes_mt_reg); } static void __exit connbytes_mt_exit(void) { - xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg)); + xt_unregister_match(&connbytes_mt_reg); } module_init(connbytes_mt_init); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70907f6baac3..7f404cc64c83 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -82,9 +82,9 @@ static inline bool already_closed(const struct nf_conn *conn) static inline unsigned int same_source_net(const union nf_inet_addr *addr, const union nf_inet_addr *mask, - const union nf_inet_addr *u3, unsigned int family) + const union nf_inet_addr *u3, u_int8_t family) { - if (family == AF_INET) { + if (family == NFPROTO_IPV4) { return (addr->ip & mask->ip) == (u3->ip & mask->ip); } else { union nf_inet_addr lh, rh; @@ -114,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data, int matches = 0; - if (match->family == AF_INET6) + if (match->family == NFPROTO_IPV6) hash = &data->iphash[connlimit_iphash6(addr, mask)]; else hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; @@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&conn->tuple); + found = __nf_conntrack_find(&init_net, &conn->tuple); found_ct = NULL; if (found != NULL) @@ -178,12 +178,9 @@ static int count_them(struct xt_connlimit_data *data, } static bool -connlimit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connlimit_info *info = matchinfo; + const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; @@ -195,10 +192,10 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in, if (ct != NULL) tuple_ptr = &ct->tuplehash[0].tuple; else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - match->family, &tuple)) + par->family, &tuple)) goto hotdrop; - if (match->family == AF_INET6) { + if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); } else { @@ -208,40 +205,37 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in, spin_lock_bh(&info->data->lock); connections = count_them(info->data, tuple_ptr, &addr, - &info->mask, match); + &info->mask, par->match); spin_unlock_bh(&info->data->lock); if (connections < 0) { /* kmalloc failed, drop it entirely */ - *hotdrop = true; + *par->hotdrop = true; return false; } return (connections > info->limit) ^ info->inverse; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } -static bool -connlimit_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connlimit_mt_check(const struct xt_mtchk_param *par) { - struct xt_connlimit_info *info = matchinfo; + struct xt_connlimit_info *info = par->matchinfo; unsigned int i; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "address family %u\n", match->family); + "address family %u\n", par->family); return false; } /* init private data */ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); if (info->data == NULL) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); return false; } @@ -252,16 +246,15 @@ connlimit_mt_check(const char *tablename, const void *ip, return true; } -static void -connlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_connlimit_info *info = matchinfo; + const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; struct xt_connlimit_conn *tmp; struct list_head *hash = info->data->iphash; unsigned int i; - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { list_for_each_entry_safe(conn, tmp, &hash[i], list) { @@ -273,41 +266,30 @@ connlimit_mt_destroy(const struct xt_match *match, void *matchinfo) kfree(info->data); } -static struct xt_match connlimit_mt_reg[] __read_mostly = { - { - .name = "connlimit", - .family = AF_INET, - .checkentry = connlimit_mt_check, - .match = connlimit_mt, - .matchsize = sizeof(struct xt_connlimit_info), - .destroy = connlimit_mt_destroy, - .me = THIS_MODULE, - }, - { - .name = "connlimit", - .family = AF_INET6, - .checkentry = connlimit_mt_check, - .match = connlimit_mt, - .matchsize = sizeof(struct xt_connlimit_info), - .destroy = connlimit_mt_destroy, - .me = THIS_MODULE, - }, +static struct xt_match connlimit_mt_reg __read_mostly = { + .name = "connlimit", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connlimit_mt_check, + .match = connlimit_mt, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_mt_destroy, + .me = THIS_MODULE, }; static int __init connlimit_mt_init(void) { - return xt_register_matches(connlimit_mt_reg, - ARRAY_SIZE(connlimit_mt_reg)); + return xt_register_match(&connlimit_mt_reg); } static void __exit connlimit_mt_exit(void) { - xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg)); + xt_unregister_match(&connlimit_mt_reg); } module_init(connlimit_mt_init); module_exit(connlimit_mt_exit); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Number of connections matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connlimit"); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index aaa1b96691f9..86cacab7a4a3 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -34,12 +34,9 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static bool -connmark_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connmark_mtinfo1 *info = matchinfo; + const struct xt_connmark_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; @@ -51,12 +48,9 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in, } static bool -connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connmark_info *info = matchinfo; + const struct xt_connmark_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -67,42 +61,35 @@ connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in, return ((ct->mark & info->mask) == info->mark) ^ info->invert; } -static bool -connmark_mt_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connmark_mt_check_v0(const struct xt_mtchk_param *par) { - const struct xt_connmark_info *cm = matchinfo; + const struct xt_connmark_info *cm = par->matchinfo; if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); return false; } - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static bool -connmark_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connmark_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connmark_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -140,7 +127,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = { { .name = "connmark", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = connmark_mt_check_v0, .match = connmark_mt_v0, .destroy = connmark_mt_destroy, @@ -153,34 +140,9 @@ static struct xt_match connmark_mt_reg[] __read_mostly = { .me = THIS_MODULE }, { - .name = "connmark", - .revision = 0, - .family = AF_INET6, - .checkentry = connmark_mt_check_v0, - .match = connmark_mt_v0, - .destroy = connmark_mt_destroy, - .matchsize = sizeof(struct xt_connmark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_info), - .compat_from_user = connmark_mt_compat_from_user_v0, - .compat_to_user = connmark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "connmark", - .revision = 1, - .family = AF_INET, - .checkentry = connmark_mt_check, - .match = connmark_mt, - .matchsize = sizeof(struct xt_connmark_mtinfo1), - .destroy = connmark_mt_destroy, - .me = THIS_MODULE, - }, - { .name = "connmark", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = connmark_mt_check, .match = connmark_mt, .matchsize = sizeof(struct xt_connmark_mtinfo1), diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index d61412f58ef7..0b7139f3dd78 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_conntrack"); MODULE_ALIAS("ip6t_conntrack"); static bool -conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_info *sinfo = matchinfo; + const struct xt_conntrack_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int statebit; @@ -121,9 +118,9 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, const union nf_inet_addr *uaddr, const union nf_inet_addr *umask, unsigned int l3proto) { - if (l3proto == AF_INET) + if (l3proto == NFPROTO_IPV4) return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; - else if (l3proto == AF_INET6) + else if (l3proto == NFPROTO_IPV6) return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, &uaddr->in6) == 0; else @@ -133,7 +130,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &info->origsrc_addr, &info->origsrc_mask, family); @@ -142,7 +139,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, &info->origdst_addr, &info->origdst_mask, family); @@ -151,7 +148,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, &info->replsrc_addr, &info->replsrc_mask, family); @@ -160,7 +157,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, &info->repldst_addr, &info->repldst_mask, family); @@ -205,12 +202,9 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, } static bool -conntrack_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = matchinfo; + const struct xt_conntrack_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -244,22 +238,22 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, match->family) ^ + if (conntrack_mt_origsrc(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, match->family) ^ + if (conntrack_mt_origdst(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, match->family) ^ + if (conntrack_mt_replsrc(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, match->family) ^ + if (conntrack_mt_repldst(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; @@ -284,23 +278,19 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -conntrack_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool conntrack_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -conntrack_mt_destroy(const struct xt_match *match, void *matchinfo) +static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -356,7 +346,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = conntrack_mt_v0, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, @@ -371,17 +361,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", .revision = 1, - .family = AF_INET, - .matchsize = sizeof(struct xt_conntrack_mtinfo1), - .match = conntrack_mt, - .checkentry = conntrack_mt_check, - .destroy = conntrack_mt_destroy, - .me = THIS_MODULE, - }, - { - .name = "conntrack", - .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), .match = conntrack_mt, .checkentry = conntrack_mt_check, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 8b6522186d9f..e5d3e8673287 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -93,20 +93,18 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, } static bool -dccp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dccp_info *info = matchinfo; + const struct xt_dccp_info *info = par->matchinfo; const struct dccp_hdr *dh; struct dccp_hdr _dh; - if (offset) + if (par->fragoff != 0) return false; - dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh); + dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh); if (dh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -118,17 +116,14 @@ dccp_mt(const struct sk_buff *skb, const struct net_device *in, XT_DCCP_DEST_PORTS, info->flags, info->invflags) && DCCHECK(match_types(dh, info->typemask), XT_DCCP_TYPE, info->flags, info->invflags) - && DCCHECK(match_option(info->option, skb, protoff, dh, - hotdrop), + && DCCHECK(match_option(info->option, skb, par->thoff, dh, + par->hotdrop), XT_DCCP_OPTION, info->flags, info->invflags); } -static bool -dccp_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool dccp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_dccp_info *info = matchinfo; + const struct xt_dccp_info *info = par->matchinfo; return !(info->flags & ~XT_DCCP_VALID_FLAGS) && !(info->invflags & ~XT_DCCP_VALID_FLAGS) @@ -138,7 +133,7 @@ dccp_mt_check(const char *tablename, const void *inf, static struct xt_match dccp_mt_reg[] __read_mostly = { { .name = "dccp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), @@ -147,7 +142,7 @@ static struct xt_match dccp_mt_reg[] __read_mostly = { }, { .name = "dccp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 26f4aab9c429..c3f8085460d7 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -26,61 +26,48 @@ MODULE_ALIAS("ipt_tos"); MODULE_ALIAS("ip6t_tos"); static bool -dscp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dscp_info *info = matchinfo; + const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } static bool -dscp_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dscp_info *info = matchinfo; + const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } -static bool -dscp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool dscp_mt_check(const struct xt_mtchk_param *par) { - const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; + const struct xt_dscp_info *info = par->matchinfo; - if (dscp > XT_DSCP_MAX) { - printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); + if (info->dscp > XT_DSCP_MAX) { + printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp); return false; } return true; } -static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool +tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_tos_info *info = matchinfo; + const struct ipt_tos_info *info = par->matchinfo; return (ip_hdr(skb)->tos == info->tos) ^ info->invert; } -static bool tos_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_tos_match_info *info = matchinfo; + const struct xt_tos_match_info *info = par->matchinfo; - if (match->family == AF_INET) + if (par->match->family == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else @@ -91,7 +78,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "dscp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dscp_mt_check, .match = dscp_mt, .matchsize = sizeof(struct xt_dscp_info), @@ -99,7 +86,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { }, { .name = "dscp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dscp_mt_check, .match = dscp_mt6, .matchsize = sizeof(struct xt_dscp_info), @@ -108,7 +95,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tos_mt_v0, .matchsize = sizeof(struct ipt_tos_info), .me = THIS_MODULE, @@ -116,7 +103,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, @@ -124,7 +111,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index a133eb9b23e1..609439967c2c 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -42,26 +42,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -esp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esp; - const struct xt_esp *espinfo = matchinfo; + const struct xt_esp *espinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp); + eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp); if (eh == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ESP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -69,13 +66,9 @@ esp_mt(const struct sk_buff *skb, const struct net_device *in, !!(espinfo->invflags & XT_ESP_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -esp_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool esp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_esp *espinfo = matchinfo; + const struct xt_esp *espinfo = par->matchinfo; if (espinfo->invflags & ~XT_ESP_INV_MASK) { duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); @@ -88,7 +81,7 @@ esp_mt_check(const char *tablename, const void *ip_void, static struct xt_match esp_mt_reg[] __read_mostly = { { .name = "esp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), @@ -97,7 +90,7 @@ static struct xt_match esp_mt_reg[] __read_mostly = { }, { .name = "esp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 6809af542a2c..6fc4292d46e6 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -80,7 +80,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ atomic_t use; - int family; + u_int8_t family; struct hashlimit_cfg1 cfg; /* config */ @@ -185,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) +static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -218,7 +218,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->cfg.gc_interval = minfo->cfg.gc_interval; hinfo->cfg.expire = minfo->cfg.expire; - if (family == AF_INET) + if (family == NFPROTO_IPV4) hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32; else hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128; @@ -237,11 +237,10 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = - proc_create_data(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6, - &dl_file_ops, hinfo); + hinfo->pde = proc_create_data(minfo->name, 0, + (family == NFPROTO_IPV4) ? + hashlimit_procdir4 : hashlimit_procdir6, + &dl_file_ops, hinfo); if (!hinfo->pde) { vfree(hinfo); return -1; @@ -258,8 +257,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) return 0; } -static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, - unsigned int family) +static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -301,11 +299,10 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = - proc_create_data(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6, - &dl_file_ops, hinfo); + hinfo->pde = proc_create_data(minfo->name, 0, + (family == NFPROTO_IPV4) ? + hashlimit_procdir4 : hashlimit_procdir6, + &dl_file_ops, hinfo); if (hinfo->pde == NULL) { vfree(hinfo); return -1; @@ -367,20 +364,18 @@ static void htable_gc(unsigned long htlong) static void htable_destroy(struct xt_hashlimit_htable *hinfo) { - /* remove timer, if it is pending */ - if (timer_pending(&hinfo->timer)) - del_timer(&hinfo->timer); + del_timer_sync(&hinfo->timer); /* remove proc entry */ remove_proc_entry(hinfo->pde->name, - hinfo->family == AF_INET ? hashlimit_procdir4 : + hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 : hashlimit_procdir6); htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } static struct xt_hashlimit_htable *htable_find_get(const char *name, - int family) + u_int8_t family) { struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; @@ -504,7 +499,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, memset(dst, 0, sizeof(*dst)); switch (hinfo->family) { - case AF_INET: + case NFPROTO_IPV4: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) dst->ip.dst = maskl(ip_hdr(skb)->daddr, hinfo->cfg.dstmask); @@ -518,7 +513,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, nexthdr = ip_hdr(skb)->protocol; break; #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - case AF_INET6: + case NFPROTO_IPV6: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) { memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr, sizeof(dst->ip6.dst)); @@ -568,19 +563,16 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, } static bool -hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_hashlimit_info *r = - ((const struct xt_hashlimit_info *)matchinfo)->u.master; + ((const struct xt_hashlimit_info *)par->matchinfo)->u.master; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; - if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; spin_lock_bh(&hinfo->lock); @@ -618,23 +610,20 @@ hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in, return false; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } static bool -hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_hashlimit_mtinfo1 *info = matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; - if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; spin_lock_bh(&hinfo->lock); @@ -671,16 +660,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, return info->cfg.mode & XT_HASHLIMIT_INVERT; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } -static bool -hashlimit_mt_check_v0(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_hashlimit_info *r = matchinfo; + struct xt_hashlimit_info *r = par->matchinfo; /* Check for overflow. */ if (r->cfg.burst == 0 || @@ -709,8 +695,8 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf, * the list of htable's in htable_create(), since then we would * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); - r->hinfo = htable_find_get(r->name, match->family); - if (!r->hinfo && htable_create_v0(r, match->family) != 0) { + r->hinfo = htable_find_get(r->name, par->match->family); + if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) { mutex_unlock(&hlimit_mutex); return false; } @@ -721,12 +707,9 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf, return true; } -static bool -hashlimit_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hashlimit_mt_check(const struct xt_mtchk_param *par) { - struct xt_hashlimit_mtinfo1 *info = matchinfo; + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; /* Check for overflow. */ if (info->cfg.burst == 0 || @@ -740,7 +723,7 @@ hashlimit_mt_check(const char *tablename, const void *inf, return false; if (info->name[sizeof(info->name)-1] != '\0') return false; - if (match->family == AF_INET) { + if (par->match->family == NFPROTO_IPV4) { if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) return false; } else { @@ -755,8 +738,8 @@ hashlimit_mt_check(const char *tablename, const void *inf, * the list of htable's in htable_create(), since then we would * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); - info->hinfo = htable_find_get(info->name, match->family); - if (!info->hinfo && htable_create(info, match->family) != 0) { + info->hinfo = htable_find_get(info->name, par->match->family); + if (!info->hinfo && htable_create(info, par->match->family) != 0) { mutex_unlock(&hlimit_mutex); return false; } @@ -765,17 +748,16 @@ hashlimit_mt_check(const char *tablename, const void *inf, } static void -hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo) +hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_info *r = matchinfo; + const struct xt_hashlimit_info *r = par->matchinfo; htable_put(r->hinfo); } -static void -hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_mtinfo1 *info = matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; htable_put(info->hinfo); } @@ -808,7 +790,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT @@ -823,7 +805,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), .checkentry = hashlimit_mt_check, @@ -833,7 +815,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { .name = "hashlimit", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT @@ -848,7 +830,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), .checkentry = hashlimit_mt_check, @@ -903,14 +885,14 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, int family, +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { /* recalculate to show accurate numbers */ rateinfo_recalc(ent, jiffies); switch (family) { - case AF_INET: + case NFPROTO_IPV4: return seq_printf(s, "%ld %u.%u.%u.%u:%u->" "%u.%u.%u.%u:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, @@ -921,7 +903,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family, ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - case AF_INET6: + case NFPROTO_IPV6: return seq_printf(s, "%ld " NIP6_FMT ":%u->" NIP6_FMT ":%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index dada2905d66e..64fc7f277221 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,12 +24,9 @@ MODULE_ALIAS("ip6t_helper"); static bool -helper_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +helper_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_helper_info *info = matchinfo; + const struct xt_helper_info *info = par->matchinfo; const struct nf_conn *ct; const struct nf_conn_help *master_help; const struct nf_conntrack_helper *helper; @@ -57,56 +54,43 @@ helper_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -helper_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool helper_mt_check(const struct xt_mtchk_param *par) { - struct xt_helper_info *info = matchinfo; + struct xt_helper_info *info = par->matchinfo; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } info->name[29] = '\0'; return true; } -static void helper_mt_destroy(const struct xt_match *match, void *matchinfo) +static void helper_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_match helper_mt_reg[] __read_mostly = { - { - .name = "helper", - .family = AF_INET, - .checkentry = helper_mt_check, - .match = helper_mt, - .destroy = helper_mt_destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, - { - .name = "helper", - .family = AF_INET6, - .checkentry = helper_mt_check, - .match = helper_mt, - .destroy = helper_mt_destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, +static struct xt_match helper_mt_reg __read_mostly = { + .name = "helper", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = helper_mt_check, + .match = helper_mt, + .destroy = helper_mt_destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, }; static int __init helper_mt_init(void) { - return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg)); + return xt_register_match(&helper_mt_reg); } static void __exit helper_mt_exit(void) { - xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg)); + xt_unregister_match(&helper_mt_reg); } module_init(helper_mt_init); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index c63e9333c755..6f62c36948d9 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -17,12 +17,9 @@ #include <linux/netfilter_ipv4/ipt_iprange.h> static bool -iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_iprange_info *info = matchinfo; + const struct ipt_iprange_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); if (info->flags & IPRANGE_SRC) { @@ -55,12 +52,9 @@ iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -iprange_mt4(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_iprange_mtinfo *info = matchinfo; + const struct xt_iprange_mtinfo *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool m; @@ -111,12 +105,9 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) } static bool -iprange_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_iprange_mtinfo *info = matchinfo; + const struct xt_iprange_mtinfo *info = par->matchinfo; const struct ipv6hdr *iph = ipv6_hdr(skb); bool m; @@ -141,7 +132,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = iprange_mt_v0, .matchsize = sizeof(struct ipt_iprange_info), .me = THIS_MODULE, @@ -149,7 +140,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = iprange_mt4, .matchsize = sizeof(struct xt_iprange_mtinfo), .me = THIS_MODULE, @@ -157,7 +148,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = iprange_mt6, .matchsize = sizeof(struct xt_iprange_mtinfo), .me = THIS_MODULE, diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index b8640f972950..c4871ca6c86d 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -21,24 +21,18 @@ MODULE_ALIAS("ipt_length"); MODULE_ALIAS("ip6t_length"); static bool -length_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +length_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_length_info *info = matchinfo; + const struct xt_length_info *info = par->matchinfo; u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } static bool -length_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +length_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_length_info *info = matchinfo; + const struct xt_length_info *info = par->matchinfo; const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); @@ -48,14 +42,14 @@ length_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match length_mt_reg[] __read_mostly = { { .name = "length", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = length_mt, .matchsize = sizeof(struct xt_length_info), .me = THIS_MODULE, }, { .name = "length", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = length_mt6, .matchsize = sizeof(struct xt_length_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index aad9ab8d2046..c908d69a5595 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -58,13 +58,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) static bool -limit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct xt_rateinfo *r = - ((const struct xt_rateinfo *)matchinfo)->master; + ((const struct xt_rateinfo *)par->matchinfo)->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -95,12 +92,9 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; } -static bool -limit_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool limit_mt_check(const struct xt_mtchk_param *par) { - struct xt_rateinfo *r = matchinfo; + struct xt_rateinfo *r = par->matchinfo; /* Check for overflow. */ if (r->burst == 0 @@ -167,43 +161,29 @@ static int limit_mt_compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match limit_mt_reg[] __read_mostly = { - { - .name = "limit", - .family = AF_INET, - .checkentry = limit_mt_check, - .match = limit_mt, - .matchsize = sizeof(struct xt_rateinfo), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_rateinfo), - .compat_from_user = limit_mt_compat_from_user, - .compat_to_user = limit_mt_compat_to_user, -#endif - .me = THIS_MODULE, - }, - { - .name = "limit", - .family = AF_INET6, - .checkentry = limit_mt_check, - .match = limit_mt, - .matchsize = sizeof(struct xt_rateinfo), +static struct xt_match limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = limit_mt, + .checkentry = limit_mt_check, + .matchsize = sizeof(struct xt_rateinfo), #ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_rateinfo), - .compat_from_user = limit_mt_compat_from_user, - .compat_to_user = limit_mt_compat_to_user, + .compatsize = sizeof(struct compat_xt_rateinfo), + .compat_from_user = limit_mt_compat_from_user, + .compat_to_user = limit_mt_compat_to_user, #endif - .me = THIS_MODULE, - }, + .me = THIS_MODULE, }; static int __init limit_mt_init(void) { - return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg)); + return xt_register_match(&limit_mt_reg); } static void __exit limit_mt_exit(void) { - xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg)); + xt_unregister_match(&limit_mt_reg); } module_init(limit_mt_init); diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index b3e96a0ec176..c2007116ce5b 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -24,12 +24,9 @@ MODULE_DESCRIPTION("Xtables: MAC address match"); MODULE_ALIAS("ipt_mac"); MODULE_ALIAS("ip6t_mac"); -static bool -mac_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mac_info *info = matchinfo; + const struct xt_mac_info *info = par->matchinfo; /* Is mac pointer valid? */ return skb_mac_header(skb) >= skb->head && @@ -39,37 +36,25 @@ mac_mt(const struct sk_buff *skb, const struct net_device *in, ^ info->invert); } -static struct xt_match mac_mt_reg[] __read_mostly = { - { - .name = "mac", - .family = AF_INET, - .match = mac_mt, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD), - .me = THIS_MODULE, - }, - { - .name = "mac", - .family = AF_INET6, - .match = mac_mt, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD), - .me = THIS_MODULE, - }, +static struct xt_match mac_mt_reg __read_mostly = { + .name = "mac", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = mac_mt, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), + .me = THIS_MODULE, }; static int __init mac_mt_init(void) { - return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); + return xt_register_match(&mac_mt_reg); } static void __exit mac_mt_exit(void) { - xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); + xt_unregister_match(&mac_mt_reg); } module_init(mac_mt_init); diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 9f78f6120fbd..10b9e34bbc5b 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -23,32 +23,24 @@ MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); static bool -mark_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mark_info *info = matchinfo; + const struct xt_mark_info *info = par->matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } static bool -mark_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mark_mtinfo1 *info = matchinfo; + const struct xt_mark_mtinfo1 *info = par->matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static bool -mark_mt_check_v0(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool mark_mt_check_v0(const struct xt_mtchk_param *par) { - const struct xt_mark_info *minfo = matchinfo; + const struct xt_mark_info *minfo = par->matchinfo; if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { printk(KERN_WARNING "mark: only supports 32bit mark\n"); @@ -92,7 +84,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = { { .name = "mark", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = mark_mt_check_v0, .match = mark_mt_v0, .matchsize = sizeof(struct xt_mark_info), @@ -104,31 +96,9 @@ static struct xt_match mark_mt_reg[] __read_mostly = { .me = THIS_MODULE, }, { - .name = "mark", - .revision = 0, - .family = AF_INET6, - .checkentry = mark_mt_check_v0, - .match = mark_mt_v0, - .matchsize = sizeof(struct xt_mark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_info), - .compat_from_user = mark_mt_compat_from_user_v0, - .compat_to_user = mark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE, - }, - { - .name = "mark", - .revision = 1, - .family = AF_INET, - .match = mark_mt, - .matchsize = sizeof(struct xt_mark_mtinfo1), - .me = THIS_MODULE, - }, - { .name = "mark", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .match = mark_mt, .matchsize = sizeof(struct xt_mark_mtinfo1), .me = THIS_MODULE, diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index fd88c489b70e..d06bb2dd3900 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -95,25 +95,22 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, } static bool -multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { const __be16 *pptr; __be16 _ports[2]; - const struct xt_multiport *multiinfo = matchinfo; + const struct xt_multiport *multiinfo = par->matchinfo; - if (offset) + if (par->fragoff != 0) return false; - pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); if (pptr == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -122,25 +119,22 @@ multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -multiport_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const __be16 *pptr; __be16 _ports[2]; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; - if (offset) + if (par->fragoff != 0) return false; - pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); if (pptr == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -164,50 +158,37 @@ check(u_int16_t proto, && count <= XT_MULTI_PORTS; } -/* Called when user tries to insert an entry of this type. */ -static bool -multiport_mt_check_v0(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt_check_v0(const struct xt_mtchk_param *par) { - const struct ipt_ip *ip = info; - const struct xt_multiport *multiinfo = matchinfo; + const struct ipt_ip *ip = par->entryinfo; + const struct xt_multiport *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ip *ip = info; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct ipt_ip *ip = par->entryinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt6_check_v0(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par) { - const struct ip6t_ip6 *ip = info; - const struct xt_multiport *multiinfo = matchinfo; + const struct ip6t_ip6 *ip = par->entryinfo; + const struct xt_multiport *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt6_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ip6 *ip = info; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct ip6t_ip6 *ip = par->entryinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); @@ -216,7 +197,7 @@ multiport_mt6_check(const char *tablename, const void *info, static struct xt_match multiport_mt_reg[] __read_mostly = { { .name = "multiport", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 0, .checkentry = multiport_mt_check_v0, .match = multiport_mt_v0, @@ -225,7 +206,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .checkentry = multiport_mt_check, .match = multiport_mt, @@ -234,7 +215,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET6, + .family = NFPROTO_IPV6, .revision = 0, .checkentry = multiport_mt6_check_v0, .match = multiport_mt_v0, @@ -243,7 +224,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET6, + .family = NFPROTO_IPV6, .revision = 1, .checkentry = multiport_mt6_check, .match = multiport_mt, diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 9059c16144c3..f19ebd9b78f5 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -21,12 +21,9 @@ #include <linux/netfilter_ipv6/ip6t_owner.h> static bool -owner_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_owner_info *info = matchinfo; + const struct ipt_owner_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -50,12 +47,9 @@ owner_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_owner_info *info = matchinfo; + const struct ip6t_owner_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -79,12 +73,9 @@ owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -owner_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_owner_match_info *info = matchinfo; + const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -116,12 +107,9 @@ owner_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -owner_mt_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool owner_mt_check_v0(const struct xt_mtchk_param *par) { - const struct ipt_owner_info *info = matchinfo; + const struct ipt_owner_info *info = par->matchinfo; if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) { printk(KERN_WARNING KBUILD_MODNAME @@ -133,12 +121,9 @@ owner_mt_check_v0(const char *tablename, const void *ip, return true; } -static bool -owner_mt6_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool owner_mt6_check_v0(const struct xt_mtchk_param *par) { - const struct ip6t_owner_info *info = matchinfo; + const struct ip6t_owner_info *info = par->matchinfo; if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { printk(KERN_WARNING KBUILD_MODNAME @@ -153,7 +138,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = owner_mt_v0, .matchsize = sizeof(struct ipt_owner_info), .checkentry = owner_mt_check_v0, @@ -164,7 +149,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 0, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = owner_mt6_v0, .matchsize = sizeof(struct ip6t_owner_info), .checkentry = owner_mt6_check_v0, @@ -175,17 +160,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 1, - .family = AF_INET, - .match = owner_mt, - .matchsize = sizeof(struct xt_owner_match_info), - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "owner", - .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .match = owner_mt, .matchsize = sizeof(struct xt_owner_match_info), .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4ec1094bda92..1bcdfc12cf59 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -21,14 +21,11 @@ MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); static bool -physdev_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { int i; static const char nulldevname[IFNAMSIZ]; - const struct xt_physdev_info *info = matchinfo; + const struct xt_physdev_info *info = par->matchinfo; bool ret; const char *indev, *outdev; const struct nf_bridge_info *nf_bridge; @@ -94,12 +91,9 @@ match_outdev: return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); } -static bool -physdev_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool physdev_mt_check(const struct xt_mtchk_param *par) { - const struct xt_physdev_info *info = matchinfo; + const struct xt_physdev_info *info = par->matchinfo; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -107,44 +101,35 @@ physdev_mt_check(const char *tablename, const void *ip, if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && - hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING))) { + par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " "traffic is not supported anymore.\n"); - if (hook_mask & (1 << NF_INET_LOCAL_OUT)) + if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return false; } return true; } -static struct xt_match physdev_mt_reg[] __read_mostly = { - { - .name = "physdev", - .family = AF_INET, - .checkentry = physdev_mt_check, - .match = physdev_mt, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, - { - .name = "physdev", - .family = AF_INET6, - .checkentry = physdev_mt_check, - .match = physdev_mt, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, +static struct xt_match physdev_mt_reg __read_mostly = { + .name = "physdev", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = physdev_mt_check, + .match = physdev_mt, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, }; static int __init physdev_mt_init(void) { - return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); + return xt_register_match(&physdev_mt_reg); } static void __exit physdev_mt_exit(void) { - xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); + xt_unregister_match(&physdev_mt_reg); } module_init(physdev_mt_init); diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 7936f7e23254..69da1d3a1d85 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -23,20 +23,17 @@ MODULE_ALIAS("ipt_pkttype"); MODULE_ALIAS("ip6t_pkttype"); static bool -pkttype_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_pkttype_info *info = matchinfo; + const struct xt_pkttype_info *info = par->matchinfo; u_int8_t type; if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (match->family == AF_INET && + else if (par->family == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (match->family == AF_INET6 && + else if (par->family == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else @@ -45,31 +42,23 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in, return (type == info->pkttype) ^ info->invert; } -static struct xt_match pkttype_mt_reg[] __read_mostly = { - { - .name = "pkttype", - .family = AF_INET, - .match = pkttype_mt, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, - { - .name = "pkttype", - .family = AF_INET6, - .match = pkttype_mt, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, +static struct xt_match pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = pkttype_mt, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, }; static int __init pkttype_mt_init(void) { - return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg)); + return xt_register_match(&pkttype_mt_reg); } static void __exit pkttype_mt_exit(void) { - xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg)); + xt_unregister_match(&pkttype_mt_reg); } module_init(pkttype_mt_init); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index d351582b2a3d..328bd20ddd25 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -26,9 +26,9 @@ xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m, const union nf_inet_addr *a2, unsigned short family) { switch (family) { - case AF_INET: + case NFPROTO_IPV4: return ((a1->ip ^ a2->ip) & m->ip) == 0; - case AF_INET6: + case NFPROTO_IPV6: return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0; } return false; @@ -110,18 +110,15 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, } static bool -policy_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +policy_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_policy_info *info = matchinfo; + const struct xt_policy_info *info = par->matchinfo; int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, match->family); + ret = match_policy_in(skb, info, par->match->family); else - ret = match_policy_out(skb, info, match->family); + ret = match_policy_out(skb, info, par->match->family); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; @@ -131,26 +128,23 @@ policy_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -policy_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool policy_mt_check(const struct xt_mtchk_param *par) { - const struct xt_policy_info *info = matchinfo; + const struct xt_policy_info *info = par->matchinfo; if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { printk(KERN_ERR "xt_policy: neither incoming nor " "outgoing policy selected\n"); return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) - && info->flags & XT_POLICY_MATCH_OUT) { + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { printk(KERN_ERR "xt_policy: output policy not valid in " "PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) - && info->flags & XT_POLICY_MATCH_IN) { + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { printk(KERN_ERR "xt_policy: input policy not valid in " "POST_ROUTING and OUTPUT\n"); return false; @@ -165,7 +159,7 @@ policy_mt_check(const char *tablename, const void *ip_void, static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -173,7 +167,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { }, { .name = "policy", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 3b021d0c522a..c84fce5e0f3e 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -18,13 +18,10 @@ MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); static bool -quota_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct xt_quota_info *q = - ((const struct xt_quota_info *)matchinfo)->master; + ((const struct xt_quota_info *)par->matchinfo)->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); @@ -40,12 +37,9 @@ quota_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -quota_mt_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool quota_mt_check(const struct xt_mtchk_param *par) { - struct xt_quota_info *q = matchinfo; + struct xt_quota_info *q = par->matchinfo; if (q->flags & ~XT_QUOTA_MASK) return false; @@ -54,33 +48,24 @@ quota_mt_check(const char *tablename, const void *entry, return true; } -static struct xt_match quota_mt_reg[] __read_mostly = { - { - .name = "quota", - .family = AF_INET, - .checkentry = quota_mt_check, - .match = quota_mt, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, - { - .name = "quota", - .family = AF_INET6, - .checkentry = quota_mt_check, - .match = quota_mt, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, +static struct xt_match quota_mt_reg __read_mostly = { + .name = "quota", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = quota_mt, + .checkentry = quota_mt_check, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE, }; static int __init quota_mt_init(void) { - return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg)); + return xt_register_match("a_mt_reg); } static void __exit quota_mt_exit(void) { - xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg)); + xt_unregister_match("a_mt_reg); } module_init(quota_mt_init); diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ebd84f1b4f62..220a1d588ee0 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -14,16 +14,10 @@ #include <net/netfilter/xt_rateest.h> -static bool xt_rateest_mt(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +static bool +xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_rateest_match_info *info = matchinfo; + const struct xt_rateest_match_info *info = par->matchinfo; struct gnet_stats_rate_est *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; @@ -80,13 +74,9 @@ static bool xt_rateest_mt(const struct sk_buff *skb, return ret; } -static bool xt_rateest_mt_checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { - struct xt_rateest_match_info *info = matchinfo; + struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | @@ -127,46 +117,34 @@ err1: return false; } -static void xt_rateest_mt_destroy(const struct xt_match *match, - void *matchinfo) +static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) { - struct xt_rateest_match_info *info = matchinfo; + struct xt_rateest_match_info *info = par->matchinfo; xt_rateest_put(info->est1); if (info->est2) xt_rateest_put(info->est2); } -static struct xt_match xt_rateest_match[] __read_mostly = { - { - .family = AF_INET, - .name = "rateest", - .match = xt_rateest_mt, - .checkentry = xt_rateest_mt_checkentry, - .destroy = xt_rateest_mt_destroy, - .matchsize = sizeof(struct xt_rateest_match_info), - .me = THIS_MODULE, - }, - { - .family = AF_INET6, - .name = "rateest", - .match = xt_rateest_mt, - .checkentry = xt_rateest_mt_checkentry, - .destroy = xt_rateest_mt_destroy, - .matchsize = sizeof(struct xt_rateest_match_info), - .me = THIS_MODULE, - }, +static struct xt_match xt_rateest_mt_reg __read_mostly = { + .name = "rateest", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = xt_rateest_mt, + .checkentry = xt_rateest_mt_checkentry, + .destroy = xt_rateest_mt_destroy, + .matchsize = sizeof(struct xt_rateest_match_info), + .me = THIS_MODULE, }; static int __init xt_rateest_mt_init(void) { - return xt_register_matches(xt_rateest_match, - ARRAY_SIZE(xt_rateest_match)); + return xt_register_match(&xt_rateest_mt_reg); } static void __exit xt_rateest_mt_fini(void) { - xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match)); + xt_unregister_match(&xt_rateest_mt_reg); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 7df1627c536f..67419287bc7e 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -22,12 +22,9 @@ MODULE_DESCRIPTION("Xtables: Routing realm match"); MODULE_ALIAS("ipt_realm"); static bool -realm_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_realm_info *info = matchinfo; + const struct xt_realm_info *info = par->matchinfo; const struct dst_entry *dst = skb->dst; return (info->id == (dst->tclassid & info->mask)) ^ info->invert; @@ -39,7 +36,7 @@ static struct xt_match realm_mt_reg __read_mostly = { .matchsize = sizeof(struct xt_realm_info), .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .family = AF_INET, + .family = NFPROTO_UNSPEC, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/netfilter/xt_recent.c index 21cb053f5d7d..4ebd4ca9a991 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/netfilter/xt_recent.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,6 +14,8 @@ */ #include <linux/init.h> #include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/module.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -27,11 +30,14 @@ #include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ipt_recent.h> +#include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_recent"); +MODULE_ALIAS("ip6t_recent"); static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; @@ -48,14 +54,15 @@ module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); -MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); +MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files"); +MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files"); struct recent_entry { struct list_head list; struct list_head lru_list; - __be32 addr; + union nf_inet_addr addr; + u_int16_t family; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; @@ -64,9 +71,9 @@ struct recent_entry { struct recent_table { struct list_head list; - char name[IPT_RECENT_NAME_LEN]; + char name[XT_RECENT_NAME_LEN]; #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc; + struct proc_dir_entry *proc_old, *proc; #endif unsigned int refcnt; unsigned int entries; @@ -79,31 +86,53 @@ static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_dir; -static const struct file_operations recent_fops; +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT +static struct proc_dir_entry *proc_old_dir; +#endif +static struct proc_dir_entry *recent_proc_dir; +static const struct file_operations recent_old_fops, recent_mt_fops; #endif static u_int32_t hash_rnd; -static int hash_rnd_initted; +static bool hash_rnd_initted; + +static unsigned int recent_entry_hash4(const union nf_inet_addr *addr) +{ + if (!hash_rnd_initted) { + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd_initted = true; + } + return jhash_1word((__force u32)addr->ip, hash_rnd) & + (ip_list_hash_size - 1); +} -static unsigned int recent_entry_hash(__be32 addr) +static unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { if (!hash_rnd_initted) { - get_random_bytes(&hash_rnd, 4); - hash_rnd_initted = 1; + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd_initted = true; } - return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); + return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & + (ip_list_hash_size - 1); } static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) +recent_entry_lookup(const struct recent_table *table, + const union nf_inet_addr *addrp, u_int16_t family, + u_int8_t ttl) { struct recent_entry *e; unsigned int h; - h = recent_entry_hash(addr); + if (family == NFPROTO_IPV4) + h = recent_entry_hash4(addrp); + else + h = recent_entry_hash6(addrp); + list_for_each_entry(e, &table->iphash[h], list) - if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl)) + if (e->family == family && + memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 && + (ttl == e->ttl || ttl == 0 || e->ttl == 0)) return e; return NULL; } @@ -117,7 +146,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) } static struct recent_entry * -recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) +recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, + u_int16_t family, u_int8_t ttl) { struct recent_entry *e; @@ -129,12 +159,16 @@ recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) GFP_ATOMIC); if (e == NULL) return NULL; - e->addr = addr; + memcpy(&e->addr, addr, sizeof(e->addr)); e->ttl = ttl; e->stamps[0] = jiffies; e->nstamps = 1; e->index = 1; - list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]); + e->family = family; + if (family == NFPROTO_IPV4) + list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]); + else + list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]); list_add_tail(&e->lru_list, &t->lru_list); t->entries++; return e; @@ -170,48 +204,59 @@ static void recent_table_flush(struct recent_table *t) } static bool -recent_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; - __be32 addr; + union nf_inet_addr addr = {}; u_int8_t ttl; bool ret = info->invert; - if (info->side == IPT_RECENT_DEST) - addr = ip_hdr(skb)->daddr; - else - addr = ip_hdr(skb)->saddr; + if (par->match->family == NFPROTO_IPV4) { + const struct iphdr *iph = ip_hdr(skb); + + if (info->side == XT_RECENT_DEST) + addr.ip = iph->daddr; + else + addr.ip = iph->saddr; + + ttl = iph->ttl; + } else { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + if (info->side == XT_RECENT_DEST) + memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6)); + else + memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6)); + + ttl = iph->hop_limit; + } - ttl = ip_hdr(skb)->ttl; /* use TTL as seen before forwarding */ - if (out && !skb->sk) + if (par->out != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); t = recent_table_lookup(info->name); - e = recent_entry_lookup(t, addr, - info->check_set & IPT_RECENT_TTL ? ttl : 0); + e = recent_entry_lookup(t, &addr, par->match->family, + (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { - if (!(info->check_set & IPT_RECENT_SET)) + if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, addr, ttl); + e = recent_entry_init(t, &addr, par->match->family, ttl); if (e == NULL) - *hotdrop = true; + *par->hotdrop = true; ret = !ret; goto out; } - if (info->check_set & IPT_RECENT_SET) + if (info->check_set & XT_RECENT_SET) ret = !ret; - else if (info->check_set & IPT_RECENT_REMOVE) { + else if (info->check_set & XT_RECENT_REMOVE) { recent_entry_remove(t, e); ret = !ret; - } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { + } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) { unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; @@ -225,8 +270,8 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in, } } - if (info->check_set & IPT_RECENT_SET || - (info->check_set & IPT_RECENT_UPDATE && ret)) { + if (info->check_set & XT_RECENT_SET || + (info->check_set & XT_RECENT_UPDATE && ret)) { recent_entry_update(t, e); e->ttl = ttl; } @@ -235,27 +280,24 @@ out: return ret; } -static bool -recent_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool recent_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; unsigned i; bool ret = false; if (hweight8(info->check_set & - (IPT_RECENT_SET | IPT_RECENT_REMOVE | - IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) + (XT_RECENT_SET | XT_RECENT_REMOVE | + XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return false; - if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && + if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) return false; if (info->hit_count > ip_pkt_list_tot) return false; if (info->name[0] == '\0' || - strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) + strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) return false; mutex_lock(&recent_mutex); @@ -276,11 +318,24 @@ recent_mt_check(const char *tablename, const void *ip, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops); + t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir, + &recent_mt_fops); if (t->proc == NULL) { kfree(t); goto out; } +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir, + &recent_old_fops); + if (t->proc_old == NULL) { + remove_proc_entry(t->name, proc_old_dir); + kfree(t); + goto out; + } + t->proc_old->uid = ip_list_uid; + t->proc_old->gid = ip_list_gid; + t->proc_old->data = t; +#endif t->proc->uid = ip_list_uid; t->proc->gid = ip_list_gid; t->proc->data = t; @@ -294,9 +349,9 @@ out: return ret; } -static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) +static void recent_mt_destroy(const struct xt_mtdtor_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); @@ -305,10 +360,13 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); - recent_table_flush(t); #ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, proc_dir); +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + remove_proc_entry(t->name, proc_old_dir); #endif + remove_proc_entry(t->name, recent_proc_dir); +#endif + recent_table_flush(t); kfree(t); } mutex_unlock(&recent_mutex); @@ -316,7 +374,7 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) #ifdef CONFIG_PROC_FS struct recent_iter_state { - struct recent_table *table; + const struct recent_table *table; unsigned int bucket; }; @@ -341,8 +399,8 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; - struct recent_entry *e = v; - struct list_head *head = e->list.next; + const struct recent_entry *e = v; + const struct list_head *head = e->list.next; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) @@ -365,8 +423,14 @@ static int recent_seq_show(struct seq_file *seq, void *v) unsigned int i; i = (e->index - 1) % ip_pkt_list_tot; - seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u", - NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index); + if (e->family == NFPROTO_IPV4) + seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu " + "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl, + e->stamps[i], e->index); + else + seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu " + "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl, + e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_printf(seq, "\n"); @@ -393,8 +457,22 @@ static int recent_seq_open(struct inode *inode, struct file *file) return 0; } -static ssize_t recent_proc_write(struct file *file, const char __user *input, - size_t size, loff_t *loff) +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT +static int recent_old_seq_open(struct inode *inode, struct file *filp) +{ + static bool warned_of_old; + + if (unlikely(!warned_of_old)) { + printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent" + " is deprecated; use /proc/net/xt_recent.\n"); + warned_of_old = true; + } + return recent_seq_open(inode, filp); +} + +static ssize_t recent_old_proc_write(struct file *file, + const char __user *input, + size_t size, loff_t *loff) { const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct recent_table *t = pde->data; @@ -407,6 +485,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, size = sizeof(buf); if (copy_from_user(buf, input, size)) return -EFAULT; + while (isspace(*c)) c++; @@ -434,10 +513,11 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, addr = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, addr, 0); + e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, addr, 0); + recent_entry_init(t, (const void *)&addr, + NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); @@ -448,23 +528,118 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, return size; } -static const struct file_operations recent_fops = { - .open = recent_seq_open, +static const struct file_operations recent_old_fops = { + .open = recent_old_seq_open, .read = seq_read, - .write = recent_proc_write, + .write = recent_old_proc_write, .release = seq_release_private, .owner = THIS_MODULE, }; +#endif + +static ssize_t +recent_mt_proc_write(struct file *file, const char __user *input, + size_t size, loff_t *loff) +{ + const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct recent_table *t = pde->data; + struct recent_entry *e; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + const char *c = buf; + union nf_inet_addr addr; + u_int16_t family; + bool add, succ; + + if (size == 0) + return 0; + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size) != 0) + return -EFAULT; + + /* Strict protocol! */ + if (*loff != 0) + return -ESPIPE; + switch (*c) { + case '/': /* flush table */ + spin_lock_bh(&recent_lock); + recent_table_flush(t); + spin_unlock_bh(&recent_lock); + return size; + case '-': /* remove address */ + add = false; + break; + case '+': /* add address */ + add = true; + break; + default: + printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n"); + return -EINVAL; + } + + ++c; + --size; + if (strnchr(c, size, ':') != NULL) { + family = NFPROTO_IPV6; + succ = in6_pton(c, size, (void *)&addr, '\n', NULL); + } else { + family = NFPROTO_IPV4; + succ = in4_pton(c, size, (void *)&addr, '\n', NULL); + } + + if (!succ) { + printk(KERN_INFO KBUILD_MODNAME ": illegal address written " + "to procfs\n"); + return -EINVAL; + } + + spin_lock_bh(&recent_lock); + e = recent_entry_lookup(t, &addr, family, 0); + if (e == NULL) { + if (add) + recent_entry_init(t, &addr, family, 0); + } else { + if (add) + recent_entry_update(t, e); + else + recent_entry_remove(t, e); + } + spin_unlock_bh(&recent_lock); + /* Note we removed one above */ + *loff += size + 1; + return size + 1; +} + +static const struct file_operations recent_mt_fops = { + .open = recent_seq_open, + .read = seq_read, + .write = recent_mt_proc_write, + .release = seq_release_private, + .owner = THIS_MODULE, +}; #endif /* CONFIG_PROC_FS */ -static struct xt_match recent_mt_reg __read_mostly = { - .name = "recent", - .family = AF_INET, - .match = recent_mt, - .matchsize = sizeof(struct ipt_recent_info), - .checkentry = recent_mt_check, - .destroy = recent_mt_destroy, - .me = THIS_MODULE, +static struct xt_match recent_mt_reg[] __read_mostly = { + { + .name = "recent", + .revision = 0, + .family = NFPROTO_IPV4, + .match = recent_mt, + .matchsize = sizeof(struct xt_recent_mtinfo), + .checkentry = recent_mt_check, + .destroy = recent_mt_destroy, + .me = THIS_MODULE, + }, + { + .name = "recent", + .revision = 0, + .family = NFPROTO_IPV6, + .match = recent_mt, + .matchsize = sizeof(struct xt_recent_mtinfo), + .checkentry = recent_mt_check, + .destroy = recent_mt_destroy, + .me = THIS_MODULE, + }, }; static int __init recent_mt_init(void) @@ -475,26 +650,39 @@ static int __init recent_mt_init(void) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); - err = xt_register_match(&recent_mt_reg); + err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); - if (proc_dir == NULL) { - xt_unregister_match(&recent_mt_reg); + recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net); + if (recent_proc_dir == NULL) { + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); + err = -ENOMEM; + } +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + if (err < 0) + return err; + proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net); + if (proc_old_dir == NULL) { + remove_proc_entry("xt_recent", init_net.proc_net); + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); err = -ENOMEM; } #endif +#endif return err; } static void __exit recent_mt_exit(void) { BUG_ON(!list_empty(&tables)); - xt_unregister_match(&recent_mt_reg); + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); #ifdef CONFIG_PROC_FS +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT remove_proc_entry("ipt_recent", init_net.proc_net); #endif + remove_proc_entry("xt_recent", init_net.proc_net); +#endif } module_init(recent_mt_init); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index e6e4681fa047..e223cb43ae8e 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -117,23 +117,21 @@ match_packet(const struct sk_buff *skb, } static bool -sctp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_sctp_info *info = matchinfo; + const struct xt_sctp_info *info = par->matchinfo; const sctp_sctphdr_t *sh; sctp_sctphdr_t _sh; - if (offset) { + if (par->fragoff != 0) { duprintf("Dropping non-first fragment.. FIXME\n"); return false; } - sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh); + sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh); if (sh == NULL) { duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); @@ -144,17 +142,14 @@ sctp_mt(const struct sk_buff *skb, const struct net_device *in, && SCCHECK(ntohs(sh->dest) >= info->dpts[0] && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), - info, hotdrop), + && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t), + info, par->hotdrop), XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -static bool -sctp_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool sctp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_sctp_info *info = matchinfo; + const struct xt_sctp_info *info = par->matchinfo; return !(info->flags & ~XT_SCTP_VALID_FLAGS) && !(info->invflags & ~XT_SCTP_VALID_FLAGS) @@ -169,7 +164,7 @@ sctp_mt_check(const char *tablename, const void *inf, static struct xt_match sctp_mt_reg[] __read_mostly = { { .name = "sctp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = sctp_mt_check, .match = sctp_mt, .matchsize = sizeof(struct xt_sctp_info), @@ -178,7 +173,7 @@ static struct xt_match sctp_mt_reg[] __read_mostly = { }, { .name = "sctp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = sctp_mt_check, .match = sctp_mt, .matchsize = sizeof(struct xt_sctp_info), diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c new file mode 100644 index 000000000000..02a8fed21082 --- /dev/null +++ b/net/netfilter/xt_socket.c @@ -0,0 +1,185 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/netfilter/nf_tproxy_core.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#define XT_SOCKET_HAVE_CONNTRACK 1 +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp_fields(const struct sk_buff *skb, + u8 *protocol, + __be32 *raddr, + __be32 *laddr, + __be16 *rport, + __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + + +static bool +socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr _hdr, *hp = NULL; + struct sock *sk; + __be32 daddr, saddr; + __be16 dport, sport; + u8 protocol; +#ifdef XT_SOCKET_HAVE_CONNTRACK + struct nf_conn const *ct; + enum ip_conntrack_info ctinfo; +#endif + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return false; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return false; + } else { + return false; + } + +#ifdef XT_SOCKET_HAVE_CONNTRACK + /* Do the lookup with the original socket address in case this is a + * reply packet of an established SNAT-ted connection. */ + + ct = nf_ct_get(skb, &ctinfo); + if (ct && (ct != &nf_conntrack_untracked) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, par->in, false); + if (sk != NULL) { + bool wildcard = (inet_sk(sk)->rcv_saddr == 0); + + nf_tproxy_put_sock(sk); + if (wildcard) + sk = NULL; + } + + pr_debug("socket match: proto %u %08x:%u -> %08x:%u " + "(orig %08x:%u) sock %p\n", + protocol, ntohl(saddr), ntohs(sport), + ntohl(daddr), ntohs(dport), + ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); + + return (sk != NULL); +} + +static struct xt_match socket_mt_reg __read_mostly = { + .name = "socket", + .family = AF_INET, + .match = socket_mt, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init socket_mt_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_match(&socket_mt_reg); +} + +static void __exit socket_mt_exit(void) +{ + xt_unregister_match(&socket_mt_reg); +} + +module_init(socket_mt_init); +module_exit(socket_mt_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("x_tables socket match module"); +MODULE_ALIAS("ipt_socket"); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index a776dc36a193..4c946cbd731f 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -21,12 +21,9 @@ MODULE_ALIAS("ipt_state"); MODULE_ALIAS("ip6t_state"); static bool -state_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +state_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_state_info *sinfo = matchinfo; + const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; @@ -40,28 +37,25 @@ state_mt(const struct sk_buff *skb, const struct net_device *in, return (sinfo->statemask & statebit); } -static bool -state_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool state_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->match->family); return false; } return true; } -static void state_mt_destroy(const struct xt_match *match, void *matchinfo) +static void state_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->match->family); } static struct xt_match state_mt_reg[] __read_mostly = { { .name = "state", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = state_mt_check, .match = state_mt, .destroy = state_mt_destroy, @@ -70,7 +64,7 @@ static struct xt_match state_mt_reg[] __read_mostly = { }, { .name = "state", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = state_mt_check, .match = state_mt, .destroy = state_mt_destroy, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 43133080da7d..0d75141139d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ip6t_statistic"); static DEFINE_SPINLOCK(nth_lock); static bool -statistic_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + struct xt_statistic_info *info = (void *)par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { @@ -52,12 +49,9 @@ statistic_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -statistic_mt_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool statistic_mt_check(const struct xt_mtchk_param *par) { - struct xt_statistic_info *info = matchinfo; + struct xt_statistic_info *info = par->matchinfo; if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) @@ -66,35 +60,24 @@ statistic_mt_check(const char *tablename, const void *entry, return true; } -static struct xt_match statistic_mt_reg[] __read_mostly = { - { - .name = "statistic", - .family = AF_INET, - .checkentry = statistic_mt_check, - .match = statistic_mt, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, - { - .name = "statistic", - .family = AF_INET6, - .checkentry = statistic_mt_check, - .match = statistic_mt, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, +static struct xt_match xt_statistic_mt_reg __read_mostly = { + .name = "statistic", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = statistic_mt, + .checkentry = statistic_mt_check, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, }; static int __init statistic_mt_init(void) { - return xt_register_matches(statistic_mt_reg, - ARRAY_SIZE(statistic_mt_reg)); + return xt_register_match(&xt_statistic_mt_reg); } static void __exit statistic_mt_exit(void) { - xt_unregister_matches(statistic_mt_reg, - ARRAY_SIZE(statistic_mt_reg)); + xt_unregister_match(&xt_statistic_mt_reg); } module_init(statistic_mt_init); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 72f694d947f4..b4d774111311 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -22,30 +22,29 @@ MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); static bool -string_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +string_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_string_info *conf = matchinfo; + const struct xt_string_info *conf = par->matchinfo; struct ts_state state; + int invert; memset(&state, 0, sizeof(struct ts_state)); + invert = (par->match->revision == 0 ? conf->u.v0.invert : + conf->u.v1.flags & XT_STRING_FLAG_INVERT); + return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config, &state) - != UINT_MAX) ^ conf->invert; + != UINT_MAX) ^ invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) -static bool -string_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool string_mt_check(const struct xt_mtchk_param *par) { - struct xt_string_info *conf = matchinfo; + struct xt_string_info *conf = par->matchinfo; struct ts_config *ts_conf; + int flags = TS_AUTOLOAD; /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) @@ -54,8 +53,15 @@ string_mt_check(const char *tablename, const void *ip, return false; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) return false; + if (par->match->revision == 1) { + if (conf->u.v1.flags & + ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT)) + return false; + if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE) + flags |= TS_IGNORECASE; + } ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, - GFP_KERNEL, TS_AUTOLOAD); + GFP_KERNEL, flags); if (IS_ERR(ts_conf)) return false; @@ -64,15 +70,16 @@ string_mt_check(const char *tablename, const void *ip, return true; } -static void string_mt_destroy(const struct xt_match *match, void *matchinfo) +static void string_mt_destroy(const struct xt_mtdtor_param *par) { - textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); + textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config); } -static struct xt_match string_mt_reg[] __read_mostly = { +static struct xt_match xt_string_mt_reg[] __read_mostly = { { .name = "string", - .family = AF_INET, + .revision = 0, + .family = NFPROTO_UNSPEC, .checkentry = string_mt_check, .match = string_mt, .destroy = string_mt_destroy, @@ -81,7 +88,8 @@ static struct xt_match string_mt_reg[] __read_mostly = { }, { .name = "string", - .family = AF_INET6, + .revision = 1, + .family = NFPROTO_UNSPEC, .checkentry = string_mt_check, .match = string_mt, .destroy = string_mt_destroy, @@ -92,12 +100,13 @@ static struct xt_match string_mt_reg[] __read_mostly = { static int __init string_mt_init(void) { - return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg)); + return xt_register_matches(xt_string_mt_reg, + ARRAY_SIZE(xt_string_mt_reg)); } static void __exit string_mt_exit(void) { - xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg)); + xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg)); } module_init(string_mt_init); diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 6771bf01275b..4809b34b10f8 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_tcpmss"); MODULE_ALIAS("ip6t_tcpmss"); static bool -tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_tcpmss_match_info *info = matchinfo; + const struct xt_tcpmss_match_info *info = par->matchinfo; const struct tcphdr *th; struct tcphdr _tcph; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ @@ -39,7 +36,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, unsigned int i, optlen; /* If we don't have the whole header, drop packet. */ - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) goto dropit; @@ -52,7 +49,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, goto out; /* Truncated options. */ - op = skb_header_pointer(skb, protoff + sizeof(*th), optlen, _opt); + op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt); if (op == NULL) goto dropit; @@ -76,14 +73,14 @@ out: return info->invert; dropit: - *hotdrop = true; + *par->hotdrop = true; return false; } static struct xt_match tcpmss_mt_reg[] __read_mostly = { { .name = "tcpmss", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tcpmss_mt, .matchsize = sizeof(struct xt_tcpmss_match_info), .proto = IPPROTO_TCP, @@ -91,7 +88,7 @@ static struct xt_match tcpmss_mt_reg[] __read_mostly = { }, { .name = "tcpmss", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = tcpmss_mt, .matchsize = sizeof(struct xt_tcpmss_match_info), .proto = IPPROTO_TCP, diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 951b06b8d701..1ebdc4934eed 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -68,25 +68,22 @@ tcp_find_option(u_int8_t option, return invert; } -static bool -tcp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct tcphdr *th; struct tcphdr _tcph; - const struct xt_tcp *tcpinfo = matchinfo; + const struct xt_tcp *tcpinfo = par->matchinfo; - if (offset) { + if (par->fragoff != 0) { /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ - if (offset == 1) { + if (par->fragoff == 1) { duprintf("Dropping evil TCP offset=1 frag.\n"); - *hotdrop = true; + *par->hotdrop = true; } /* Must not be a fragment. */ return false; @@ -94,12 +91,12 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in, #define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -117,49 +114,42 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in, return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { - *hotdrop = true; + *par->hotdrop = true; return false; } - if (!tcp_find_option(tcpinfo->option, skb, protoff, + if (!tcp_find_option(tcpinfo->option, skb, par->thoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, - hotdrop)) + par->hotdrop)) return false; } return true; } -/* Called when user tries to insert an entry of this type. */ -static bool -tcp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool tcp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_tcp *tcpinfo = matchinfo; + const struct xt_tcp *tcpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); } -static bool -udp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct udphdr *uh; struct udphdr _udph; - const struct xt_udp *udpinfo = matchinfo; + const struct xt_udp *udpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); + uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -171,13 +161,9 @@ udp_mt(const struct sk_buff *skb, const struct net_device *in, !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } -/* Called when user tries to insert an entry of this type. */ -static bool -udp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool udp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_udp *udpinfo = matchinfo; + const struct xt_udp *udpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(udpinfo->invflags & ~XT_UDP_INV_MASK); @@ -186,7 +172,7 @@ udp_mt_check(const char *tablename, const void *info, static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), @@ -195,7 +181,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "tcp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), @@ -204,7 +190,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -213,7 +199,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -222,7 +208,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udplite", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -231,7 +217,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udplite", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ed76baab4734..29375ba8db73 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -136,26 +136,26 @@ static void localtime_3(struct xtm *r, time_t time) * from w repeatedly while counting.) */ if (is_leap(year)) { + /* use days_since_leapyear[] in a leap year */ for (i = ARRAY_SIZE(days_since_leapyear) - 1; - i > 0 && days_since_year[i] > w; --i) + i > 0 && days_since_leapyear[i] > w; --i) /* just loop */; + r->monthday = w - days_since_leapyear[i] + 1; } else { for (i = ARRAY_SIZE(days_since_year) - 1; i > 0 && days_since_year[i] > w; --i) /* just loop */; + r->monthday = w - days_since_year[i] + 1; } r->month = i + 1; - r->monthday = w - days_since_year[i] + 1; return; } static bool -time_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +time_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_time_info *info = matchinfo; + const struct xt_time_info *info = par->matchinfo; unsigned int packet_time; struct xtm current_time; s64 stamp; @@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, __net_timestamp((struct sk_buff *)skb); stamp = ktime_to_ns(skb->tstamp); - do_div(stamp, NSEC_PER_SEC); + stamp = div_s64(stamp, NSEC_PER_SEC); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ @@ -218,12 +218,9 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -time_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool time_mt_check(const struct xt_mtchk_param *par) { - const struct xt_time_info *info = matchinfo; + const struct xt_time_info *info = par->matchinfo; if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { @@ -235,33 +232,23 @@ time_mt_check(const char *tablename, const void *ip, return true; } -static struct xt_match time_mt_reg[] __read_mostly = { - { - .name = "time", - .family = AF_INET, - .match = time_mt, - .matchsize = sizeof(struct xt_time_info), - .checkentry = time_mt_check, - .me = THIS_MODULE, - }, - { - .name = "time", - .family = AF_INET6, - .match = time_mt, - .matchsize = sizeof(struct xt_time_info), - .checkentry = time_mt_check, - .me = THIS_MODULE, - }, +static struct xt_match xt_time_mt_reg __read_mostly = { + .name = "time", + .family = NFPROTO_UNSPEC, + .match = time_mt, + .checkentry = time_mt_check, + .matchsize = sizeof(struct xt_time_info), + .me = THIS_MODULE, }; static int __init time_mt_init(void) { - return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg)); + return xt_register_match(&xt_time_mt_reg); } static void __exit time_mt_exit(void) { - xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg)); + xt_unregister_match(&xt_time_mt_reg); } module_init(time_mt_init); diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 627e0f336d54..24a527624500 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -87,43 +87,32 @@ static bool u32_match_it(const struct xt_u32 *data, return true; } -static bool -u32_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_u32 *data = matchinfo; + const struct xt_u32 *data = par->matchinfo; bool ret; ret = u32_match_it(data, skb); return ret ^ data->invert; } -static struct xt_match u32_mt_reg[] __read_mostly = { - { - .name = "u32", - .family = AF_INET, - .match = u32_mt, - .matchsize = sizeof(struct xt_u32), - .me = THIS_MODULE, - }, - { - .name = "u32", - .family = AF_INET6, - .match = u32_mt, - .matchsize = sizeof(struct xt_u32), - .me = THIS_MODULE, - }, +static struct xt_match xt_u32_mt_reg __read_mostly = { + .name = "u32", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = u32_mt, + .matchsize = sizeof(struct xt_u32), + .me = THIS_MODULE, }; static int __init u32_mt_init(void) { - return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg)); + return xt_register_match(&xt_u32_mt_reg); } static void __exit u32_mt_exit(void) { - xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg)); + xt_unregister_match(&xt_u32_mt_reg); } module_init(u32_mt_init); diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index 8af18c0a47d9..ea750e9df65f 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -5,7 +5,8 @@ # # base objects -obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o +obj-y := netlabel_user.o netlabel_kapi.o +obj-y += netlabel_domainhash.o netlabel_addrlist.o # management objects obj-y += netlabel_mgmt.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c new file mode 100644 index 000000000000..b0925a303353 --- /dev/null +++ b/net/netlabel/netlabel_addrlist.c @@ -0,0 +1,388 @@ +/* + * NetLabel Network Address Lists + * + * This file contains network address list functions used to manage ordered + * lists of network addresses for use by the NetLabel subsystem. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <linux/audit.h> + +#include "netlabel_addrlist.h" + +/* + * Address List Functions + */ + +/** + * netlbl_af4list_search - Search for a matching IPv4 address entry + * @addr: IPv4 address + * @head: the list head + * + * Description: + * Searches the IPv4 address list given by @head. If a matching address entry + * is found it is returned, otherwise NULL is returned. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af4list *netlbl_af4list_search(__be32 addr, + struct list_head *head) +{ + struct netlbl_af4list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && (addr & iter->mask) == iter->addr) + return iter; + + return NULL; +} + +/** + * netlbl_af4list_search_exact - Search for an exact IPv4 address entry + * @addr: IPv4 address + * @mask: IPv4 address mask + * @head: the list head + * + * Description: + * Searches the IPv4 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head) +{ + struct netlbl_af4list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && iter->addr == addr && iter->mask == mask) + return iter; + + return NULL; +} + + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_search - Search for a matching IPv6 address entry + * @addr: IPv6 address + * @head: the list head + * + * Description: + * Searches the IPv6 address list given by @head. If a matching address entry + * is found it is returned, otherwise NULL is returned. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, + struct list_head *head) +{ + struct netlbl_af6list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) + return iter; + + return NULL; +} + +/** + * netlbl_af6list_search_exact - Search for an exact IPv6 address entry + * @addr: IPv6 address + * @mask: IPv6 address mask + * @head: the list head + * + * Description: + * Searches the IPv6 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head) +{ + struct netlbl_af6list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_addr_equal(&iter->addr, addr) && + ipv6_addr_equal(&iter->mask, mask)) + return iter; + + return NULL; +} +#endif /* IPv6 */ + +/** + * netlbl_af4list_add - Add a new IPv4 address entry to a list + * @entry: address entry + * @head: the list head + * + * Description: + * Add a new address entry to the list pointed to by @head. On success zero is + * returned, otherwise a negative value is returned. The caller is responsible + * for calling the necessary locking functions. + * + */ +int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head) +{ + struct netlbl_af4list *iter; + + iter = netlbl_af4list_search(entry->addr, head); + if (iter != NULL && + iter->addr == entry->addr && iter->mask == entry->mask) + return -EEXIST; + + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ntohl(entry->mask) > ntohl(iter->mask)) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + return 0; + } + list_add_tail_rcu(&entry->list, head); + return 0; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_add - Add a new IPv6 address entry to a list + * @entry: address entry + * @head: the list head + * + * Description: + * Add a new address entry to the list pointed to by @head. On success zero is + * returned, otherwise a negative value is returned. The caller is responsible + * for calling the necessary locking functions. + * + */ +int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head) +{ + struct netlbl_af6list *iter; + + iter = netlbl_af6list_search(&entry->addr, head); + if (iter != NULL && + ipv6_addr_equal(&iter->addr, &entry->addr) && + ipv6_addr_equal(&iter->mask, &entry->mask)) + return -EEXIST; + + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + return 0; + } + list_add_tail_rcu(&entry->list, head); + return 0; +} +#endif /* IPv6 */ + +/** + * netlbl_af4list_remove_entry - Remove an IPv4 address entry + * @entry: address entry + * + * Description: + * Remove the specified IP address entry. The caller is responsible for + * calling the necessary locking functions. + * + */ +void netlbl_af4list_remove_entry(struct netlbl_af4list *entry) +{ + entry->valid = 0; + list_del_rcu(&entry->list); +} + +/** + * netlbl_af4list_remove - Remove an IPv4 address entry + * @addr: IP address + * @mask: IP address mask + * @head: the list head + * + * Description: + * Remove an IP address entry from the list pointed to by @head. Returns the + * entry on success, NULL on failure. The caller is responsible for calling + * the necessary locking functions. + * + */ +struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, + struct list_head *head) +{ + struct netlbl_af4list *entry; + + entry = netlbl_af4list_search(addr, head); + if (entry != NULL && entry->addr == addr && entry->mask == mask) { + netlbl_af4list_remove_entry(entry); + return entry; + } + + return NULL; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_remove_entry - Remove an IPv6 address entry + * @entry: address entry + * + * Description: + * Remove the specified IP address entry. The caller is responsible for + * calling the necessary locking functions. + * + */ +void netlbl_af6list_remove_entry(struct netlbl_af6list *entry) +{ + entry->valid = 0; + list_del_rcu(&entry->list); +} + +/** + * netlbl_af6list_remove - Remove an IPv6 address entry + * @addr: IP address + * @mask: IP address mask + * @head: the list head + * + * Description: + * Remove an IP address entry from the list pointed to by @head. Returns the + * entry on success, NULL on failure. The caller is responsible for calling + * the necessary locking functions. + * + */ +struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head) +{ + struct netlbl_af6list *entry; + + entry = netlbl_af6list_search(addr, head); + if (entry != NULL && + ipv6_addr_equal(&entry->addr, addr) && + ipv6_addr_equal(&entry->mask, mask)) { + netlbl_af6list_remove_entry(entry); + return entry; + } + + return NULL; +} +#endif /* IPv6 */ + +/* + * Audit Helper Functions + */ + +/** + * netlbl_af4list_audit_addr - Audit an IPv4 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv4 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + u32 mask_val = ntohl(mask); + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr)); + if (mask_val != 0xffffffff) { + u32 mask_len = 0; + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_audit_addr - Audit an IPv6 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv6 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr)); + if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { + u32 mask_len = 0; + u32 mask_val; + int iter = -1; + while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) + mask_len += 32; + mask_val = ntohl(mask->s6_addr32[iter]); + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} +#endif /* IPv6 */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h new file mode 100644 index 000000000000..0242bead405f --- /dev/null +++ b/net/netlabel/netlabel_addrlist.h @@ -0,0 +1,189 @@ +/* + * NetLabel Network Address Lists + * + * This file contains network address list functions used to manage ordered + * lists of network addresses for use by the NetLabel subsystem. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_ADDRLIST_H +#define _NETLABEL_ADDRLIST_H + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/in6.h> +#include <linux/audit.h> + +/** + * struct netlbl_af4list - NetLabel IPv4 address list + * @addr: IPv4 address + * @mask: IPv4 address mask + * @valid: valid flag + * @list: list structure, used internally + */ +struct netlbl_af4list { + __be32 addr; + __be32 mask; + + u32 valid; + struct list_head list; +}; + +/** + * struct netlbl_af6list - NetLabel IPv6 address list + * @addr: IPv6 address + * @mask: IPv6 address mask + * @valid: valid flag + * @list: list structure, used internally + */ +struct netlbl_af6list { + struct in6_addr addr; + struct in6_addr mask; + + u32 valid; + struct list_head list; +}; + +#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list) + +static inline struct netlbl_af4list *__af4list_valid(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af4list *n = __af4list_entry(s); + while (i != h && !n->valid) { + i = i->next; + n = __af4list_entry(i); + } + return n; +} + +static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af4list *n = __af4list_entry(s); + while (i != h && !n->valid) { + i = rcu_dereference(i->next); + n = __af4list_entry(i); + } + return n; +} + +#define netlbl_af4list_foreach(iter, head) \ + for (iter = __af4list_valid((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af4list_valid(iter->list.next, head)) + +#define netlbl_af4list_foreach_rcu(iter, head) \ + for (iter = __af4list_valid_rcu((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af4list_valid_rcu(iter->list.next, head)) + +#define netlbl_af4list_foreach_safe(iter, tmp, head) \ + for (iter = __af4list_valid((head)->next, head), \ + tmp = __af4list_valid(iter->list.next, head); \ + &iter->list != (head); \ + iter = tmp, tmp = __af4list_valid(iter->list.next, head)) + +int netlbl_af4list_add(struct netlbl_af4list *entry, + struct list_head *head); +struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, + struct list_head *head); +void netlbl_af4list_remove_entry(struct netlbl_af4list *entry); +struct netlbl_af4list *netlbl_af4list_search(__be32 addr, + struct list_head *head); +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head); +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list) + +static inline struct netlbl_af6list *__af6list_valid(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af6list *n = __af6list_entry(s); + while (i != h && !n->valid) { + i = i->next; + n = __af6list_entry(i); + } + return n; +} + +static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af6list *n = __af6list_entry(s); + while (i != h && !n->valid) { + i = rcu_dereference(i->next); + n = __af6list_entry(i); + } + return n; +} + +#define netlbl_af6list_foreach(iter, head) \ + for (iter = __af6list_valid((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af6list_valid(iter->list.next, head)) + +#define netlbl_af6list_foreach_rcu(iter, head) \ + for (iter = __af6list_valid_rcu((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af6list_valid_rcu(iter->list.next, head)) + +#define netlbl_af6list_foreach_safe(iter, tmp, head) \ + for (iter = __af6list_valid((head)->next, head), \ + tmp = __af6list_valid(iter->list.next, head); \ + &iter->list != (head); \ + iter = tmp, tmp = __af6list_valid(iter->list.next, head)) + +int netlbl_af6list_add(struct netlbl_af6list *entry, + struct list_head *head); +struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head); +void netlbl_af6list_remove_entry(struct netlbl_af6list *entry); +struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, + struct list_head *head); +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head); +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask); +#endif /* IPV6 */ + +#endif diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index fdc14a0d21af..fff32b70efa9 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -43,6 +43,7 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" #include "netlabel_mgmt.h" +#include "netlabel_domainhash.h" /* Argument struct for cipso_v4_doi_walk() */ struct netlbl_cipsov4_doiwalk_arg { @@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg { u32 seq; }; +/* Argument struct for netlbl_domhsh_walk() */ +struct netlbl_domhsh_walk_arg { + struct netlbl_audit *audit_info; + u32 doi; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { .id = GENL_ID_GENERATE, @@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 */ /** - * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to the DOI definition can be released - * safely. - * - */ -void netlbl_cipsov4_doi_free(struct rcu_head *entry) -{ - struct cipso_v4_doi *ptr; - - ptr = container_of(entry, struct cipso_v4_doi, rcu); - switch (ptr->type) { - case CIPSO_V4_MAP_STD: - kfree(ptr->map.std->lvl.cipso); - kfree(ptr->map.std->lvl.local); - kfree(ptr->map.std->cat.cipso); - kfree(ptr->map.std->cat.local); - break; - } - kfree(ptr); -} - -/** * netlbl_cipsov4_add_common - Parse the common sections of a ADD message * @info: the Generic NETLINK info block * @doi_def: the CIPSO V4 DOI definition @@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, * @info: the Generic NETLINK info block * * Description: - * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message - * and add it to the CIPSO V4 engine. Return zero on success and non-zero on - * error. + * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. * */ static int netlbl_cipsov4_add_std(struct genl_info *info) @@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = -ENOMEM; goto add_std_failure; } - doi_def->type = CIPSO_V4_MAP_STD; + doi_def->type = CIPSO_V4_MAP_TRANS; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) @@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) add_std_failure: if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) return 0; add_pass_failure: - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); + return ret_val; +} + +/** + * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition + * @info: the Generic NETLINK info block + * + * Description: + * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. + * + */ +static int netlbl_cipsov4_add_local(struct genl_info *info) +{ + int ret_val; + struct cipso_v4_doi *doi_def = NULL; + + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) + return -EINVAL; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) + return -ENOMEM; + doi_def->type = CIPSO_V4_MAP_LOCAL; + + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_local_failure; + + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_local_failure; + return 0; + +add_local_failure: + cipso_v4_doi_free(doi_def); return ret_val; } @@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: type_str = "pass"; ret_val = netlbl_cipsov4_add_pass(info); break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + ret_val = netlbl_cipsov4_add_local(info); + break; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); @@ -491,7 +513,7 @@ list_start: doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { ret_val = -EINVAL; - goto list_failure; + goto list_failure_lock; } ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); @@ -516,7 +538,7 @@ list_start: nla_nest_end(ans_skb, nla_a); switch (doi_def->type) { - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; @@ -584,19 +606,14 @@ list_start: rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - - return 0; + return genlmsg_reply(ans_skb, info); list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { rcu_read_unlock(); kfree_skb(ans_skb); - nlsze_mult++; + nlsze_mult *= 2; goto list_start; } list_failure_lock: @@ -660,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_cipsov4_doiwalk_arg cb_arg; - int doi_skip = cb->args[0]; + u32 doi_skip = cb->args[0]; cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -673,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, } /** + * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE + * @entry: LSM domain mapping entry + * @arg: the netlbl_domhsh_walk_arg structure + * + * Description: + * This function is intended for use by netlbl_cipsov4_remove() as the callback + * for the netlbl_domhsh_walk() function; it removes LSM domain map entries + * which are associated with the CIPSO DOI specified in @arg. Returns zero on + * success, negative values on failure. + * + */ +static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) +{ + struct netlbl_domhsh_walk_arg *cb_arg = arg; + + if (entry->type == NETLBL_NLTYPE_CIPSOV4 && + entry->type_def.cipsov4->doi == cb_arg->doi) + return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); + + return 0; +} + +/** * netlbl_cipsov4_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block @@ -686,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; u32 doi = 0; + struct netlbl_domhsh_walk_arg cb_arg; struct audit_buffer *audit_buf; struct netlbl_audit audit_info; + u32 skip_bkt = 0; + u32 skip_chain = 0; if (!info->attrs[NLBL_CIPSOV4_A_DOI]) return -EINVAL; @@ -695,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); netlbl_netlink_auditinfo(skb, &audit_info); - ret_val = cipso_v4_doi_remove(doi, - &audit_info, - netlbl_cipsov4_doi_free); - if (ret_val == 0) - atomic_dec(&netlabel_mgmt_protocount); + cb_arg.doi = doi; + cb_arg.audit_info = &audit_info; + ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, + netlbl_cipsov4_remove_cb, &cb_arg); + if (ret_val == 0 || ret_val == -ENOENT) { + ret_val = cipso_v4_doi_remove(doi, &audit_info); + if (ret_val == 0) + atomic_dec(&netlabel_mgmt_protocount); + } audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index 220cb9d06b49..c8a4079261f0 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -45,12 +45,13 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o REMOVE: * Sent by an application to remove a specific DOI mapping table from the @@ -76,12 +77,13 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o LISTALL: * This message is sent by an application to list the valid DOIs on the diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 02c2f7c0b255..5fadf10e5ddf 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,8 +30,7 @@ */ #include <linux/types.h> -#include <linux/rcupdate.h> -#include <linux/list.h> +#include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/string.h> @@ -41,6 +40,7 @@ #include <asm/bug.h> #include "netlabel_mgmt.h" +#include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_user.h" @@ -73,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL; static void netlbl_domhsh_free_entry(struct rcu_head *entry) { struct netlbl_dom_map *ptr; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); + if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_safe(iter4, tmp4, + &ptr->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + kfree(netlbl_domhsh_addr4_entry(iter4)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &ptr->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + kfree(netlbl_domhsh_addr6_entry(iter6)); + } +#endif /* IPv6 */ + } kfree(ptr->domain); kfree(ptr); } @@ -116,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key) static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) { u32 bkt; + struct list_head *bkt_list; struct netlbl_dom_map *iter; if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); - list_for_each_entry_rcu(iter, - &rcu_dereference(netlbl_domhsh)->tbl[bkt], - list) + bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt]; + list_for_each_entry_rcu(iter, bkt_list, list) if (iter->valid && strcmp(iter->domain, domain) == 0) return iter; } @@ -157,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) return entry; } +/** + * netlbl_domhsh_audit_add - Generate an audit entry for an add event + * @entry: the entry being added + * @addr4: the IPv4 address information + * @addr6: the IPv6 address information + * @result: the result code + * @audit_info: NetLabel audit information + * + * Description: + * Generate an audit record for adding a new NetLabel/LSM mapping entry with + * the given information. Caller is responsibile for holding the necessary + * locks. + * + */ +static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, + struct netlbl_af4list *addr4, + struct netlbl_af6list *addr6, + int result, + struct netlbl_audit *audit_info) +{ + struct audit_buffer *audit_buf; + struct cipso_v4_doi *cipsov4 = NULL; + u32 type; + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + if (audit_buf != NULL) { + audit_log_format(audit_buf, " nlbl_domain=%s", + entry->domain ? entry->domain : "(default)"); + if (addr4 != NULL) { + struct netlbl_domaddr4_map *map4; + map4 = netlbl_domhsh_addr4_entry(addr4); + type = map4->type; + cipsov4 = map4->type_def.cipsov4; + netlbl_af4list_audit_addr(audit_buf, 0, NULL, + addr4->addr, addr4->mask); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (addr6 != NULL) { + struct netlbl_domaddr6_map *map6; + map6 = netlbl_domhsh_addr6_entry(addr6); + type = map6->type; + netlbl_af6list_audit_addr(audit_buf, 0, NULL, + &addr6->addr, &addr6->mask); +#endif /* IPv6 */ + } else { + type = entry->type; + cipsov4 = entry->type_def.cipsov4; + } + switch (type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + BUG_ON(cipsov4 == NULL); + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + cipsov4->doi); + break; + } + audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0); + audit_log_end(audit_buf); + } +} + /* * Domain Hash Table Functions */ @@ -214,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size) int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info) { - int ret_val; - u32 bkt; - struct audit_buffer *audit_buf; - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = 0; - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, - entry->domain); - break; - default: - return -EINVAL; - } - if (ret_val != 0) - return ret_val; - - entry->valid = 1; - INIT_RCU_HEAD(&entry->rcu); + int ret_val = 0; + struct netlbl_dom_map *entry_old; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); - if (entry->domain != NULL) { - bkt = netlbl_domhsh_hash(entry->domain); - if (netlbl_domhsh_search(entry->domain) == NULL) + if (entry->domain != NULL) + entry_old = netlbl_domhsh_search(entry->domain); + else + entry_old = netlbl_domhsh_search_def(entry->domain); + if (entry_old == NULL) { + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + if (entry->domain != NULL) { + u32 bkt = netlbl_domhsh_hash(entry->domain); list_add_tail_rcu(&entry->list, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); - else - ret_val = -EEXIST; - } else { - INIT_LIST_HEAD(&entry->list); - if (rcu_dereference(netlbl_domhsh_def) == NULL) + } else { + INIT_LIST_HEAD(&entry->list); rcu_assign_pointer(netlbl_domhsh_def, entry); - else - ret_val = -EEXIST; - } - spin_unlock(&netlbl_domhsh_lock); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); - if (audit_buf != NULL) { - audit_log_format(audit_buf, - " nlbl_domain=%s", - entry->domain ? entry->domain : "(default)"); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " nlbl_protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " nlbl_protocol=cipsov4 cipso_doi=%u", - entry->type_def.cipsov4->doi); - break; } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - rcu_read_unlock(); - if (ret_val != 0) { - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain) != 0) - BUG(); - break; + if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + netlbl_domhsh_audit_add(entry, iter4, NULL, + ret_val, audit_info); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + netlbl_domhsh_audit_add(entry, NULL, iter6, + ret_val, audit_info); +#endif /* IPv6 */ + } else + netlbl_domhsh_audit_add(entry, NULL, NULL, + ret_val, audit_info); + } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && + entry->type == NETLBL_NLTYPE_ADDRSELECT) { + struct list_head *old_list4; + struct list_head *old_list6; + + old_list4 = &entry_old->type_def.addrsel->list4; + old_list6 = &entry_old->type_def.addrsel->list6; + + /* we only allow the addition of address selectors if all of + * the selectors do not exist in the existing domain map */ + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + if (netlbl_af4list_search_exact(iter4->addr, + iter4->mask, + old_list4)) { + ret_val = -EEXIST; + goto add_return; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + if (netlbl_af6list_search_exact(&iter6->addr, + &iter6->mask, + old_list6)) { + ret_val = -EEXIST; + goto add_return; + } +#endif /* IPv6 */ + + netlbl_af4list_foreach_safe(iter4, tmp4, + &entry->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + iter4->valid = 1; + ret_val = netlbl_af4list_add(iter4, old_list4); + netlbl_domhsh_audit_add(entry_old, iter4, NULL, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; } - } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &entry->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + iter6->valid = 1; + ret_val = netlbl_af6list_add(iter6, old_list6); + netlbl_domhsh_audit_add(entry_old, NULL, iter6, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; + } +#endif /* IPv6 */ + } else + ret_val = -EINVAL; +add_return: + spin_unlock(&netlbl_domhsh_lock); + rcu_read_unlock(); return ret_val; } @@ -303,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, } /** - * netlbl_domhsh_remove - Removes an entry from the domain hash table - * @domain: the domain to remove + * netlbl_domhsh_remove_entry - Removes a given entry from the domain table + * @entry: the entry to remove * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the - * lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. + * lower level protocol handler (i.e. CIPSO). Caller is responsible for + * ensuring that the RCU read lock is held. Returns zero on success, negative + * on failure. * */ -int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; - struct netlbl_dom_map *entry; + int ret_val = 0; struct audit_buffer *audit_buf; - rcu_read_lock(); - if (domain) - entry = netlbl_domhsh_search(domain); - else - entry = netlbl_domhsh_search_def(domain); if (entry == NULL) - goto remove_return; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain); - break; - } + return -ENOENT; + spin_lock(&netlbl_domhsh_lock); if (entry->valid) { entry->valid = 0; @@ -339,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) list_del_rcu(&entry->list); else rcu_assign_pointer(netlbl_domhsh_def, NULL); - ret_val = 0; - } + } else + ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); @@ -352,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) audit_log_end(audit_buf); } -remove_return: - rcu_read_unlock(); - if (ret_val == 0) + if (ret_val == 0) { + struct netlbl_af4list *iter4; + struct netlbl_domaddr4_map *map4; + + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + map4 = netlbl_domhsh_addr4_entry(iter4); + cipso_v4_doi_putdef(map4->type_def.cipsov4); + } + /* no need to check the IPv6 list since we currently + * support only unlabeled protocols for IPv6 */ + break; + case NETLBL_NLTYPE_CIPSOV4: + cipso_v4_doi_putdef(entry->type_def.cipsov4); + break; + } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + } + + return ret_val; +} + +/** + * netlbl_domhsh_remove - Removes an entry from the domain hash table + * @domain: the domain to remove + * @audit_info: NetLabel audit information + * + * Description: + * Removes an entry from the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +{ + int ret_val; + struct netlbl_dom_map *entry; + + rcu_read_lock(); + if (domain) + entry = netlbl_domhsh_search(domain); + else + entry = netlbl_domhsh_search_def(domain); + ret_val = netlbl_domhsh_remove_entry(entry, audit_info); + rcu_read_unlock(); + return ret_val; } @@ -390,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) } /** + * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af4list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af4list_search(addr, + &dom_iter->type_def.addrsel->list4); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr4_entry(addr_iter); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af6list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af6list_search(addr, + &dom_iter->type_def.addrsel->list6); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr6_entry(addr_iter); +} +#endif /* IPv6 */ + +/** * netlbl_domhsh_walk - Iterate through the domain mapping hash table * @skip_bkt: the number of buckets to skip at the start * @skip_chain: the number of entries to skip in the first iterated bucket @@ -411,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt, { int ret_val = -ENOENT; u32 iter_bkt; + struct list_head *iter_list; struct netlbl_dom_map *iter_entry; u32 chain_cnt = 0; @@ -418,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt, for (iter_bkt = *skip_bkt; iter_bkt < rcu_dereference(netlbl_domhsh)->size; iter_bkt++, chain_cnt = 0) { - list_for_each_entry_rcu(iter_entry, - &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], - list) + iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt]; + list_for_each_entry_rcu(iter_entry, iter_list, list) if (iter_entry->valid) { if (chain_cnt++ < *skip_chain) continue; diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 8220990ceb96..bfcb6763a1a1 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,16 +36,43 @@ #include <linux/rcupdate.h> #include <linux/list.h> +#include "netlabel_addrlist.h" + /* Domain hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_DOMHSH_BITSIZE 7 -/* Domain mapping definition struct */ +/* Domain mapping definition structures */ +#define netlbl_domhsh_addr4_entry(iter) \ + container_of(iter, struct netlbl_domaddr4_map, list) +struct netlbl_domaddr4_map { + u32 type; + union { + struct cipso_v4_doi *cipsov4; + } type_def; + + struct netlbl_af4list list; +}; +#define netlbl_domhsh_addr6_entry(iter) \ + container_of(iter, struct netlbl_domaddr6_map, list) +struct netlbl_domaddr6_map { + u32 type; + + /* NOTE: no 'type_def' union needed at present since we don't currently + * support any IPv6 labeling protocols */ + + struct netlbl_af6list list; +}; +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; struct netlbl_dom_map { char *domain; u32 type; union { struct cipso_v4_doi *cipsov4; + struct netlbl_domaddr_map *addrsel; } type_def; u32 valid; @@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 39793a1a93aa..b32eceb3ab0d 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_unlbl_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) @@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure: } /** - * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition - * @doi_def: the DOI definition - * @audit_info: NetLabel audit information - * - * Description: - * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on - * success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, - struct netlbl_audit *audit_info) -{ - int ret_val; - const char *type_str; - struct audit_buffer *audit_buf; - - ret_val = cipso_v4_doi_add(doi_def); - - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - audit_info); - if (audit_buf != NULL) { - switch (doi_def->type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; - break; - case CIPSO_V4_MAP_PASS: - type_str = "pass"; - break; - default: - type_str = "(unknown)"; - } - audit_log_format(audit_buf, - " cipso_doi=%u cipso_type=%s res=%u", - doi_def->doi, - type_str, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - - return ret_val; -} - -/** * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping * @doi_def: the DOI definition * @domain: the domain mapping to add @@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -ENOMEM; + u32 doi; + u32 doi_type; struct netlbl_dom_map *entry; + const char *type_str; + struct audit_buffer *audit_buf; + + doi = doi_def->doi; + doi_type = doi_def->type; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_cipsov4_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) goto cfg_cipsov4_add_map_failure; } - entry->type = NETLBL_NLTYPE_CIPSOV4; - entry->type_def.cipsov4 = doi_def; - - /* Grab a RCU read lock here so nothing happens to the doi_def variable - * between adding it to the CIPSOv4 protocol engine and adding a - * domain mapping for it. */ - rcu_read_lock(); - ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); + ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_unlock; + goto cfg_cipsov4_add_map_failure_remove_doi; + entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi); + if (entry->type_def.cipsov4 == NULL) { + ret_val = -ENOENT; + goto cfg_cipsov4_add_map_failure_remove_doi; + } ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_remove_doi; - rcu_read_unlock(); + goto cfg_cipsov4_add_map_failure_release_doi; - return 0; +cfg_cipsov4_add_map_return: + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + audit_info); + if (audit_buf != NULL) { + switch (doi_type) { + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; + break; + case CIPSO_V4_MAP_PASS: + type_str = "pass"; + break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + break; + default: + type_str = "(unknown)"; + } + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, type_str, ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + return ret_val; + +cfg_cipsov4_add_map_failure_release_doi: + cipso_v4_doi_putdef(doi_def); cfg_cipsov4_add_map_failure_remove_doi: - cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); -cfg_cipsov4_add_map_failure_unlock: - rcu_read_unlock(); + cipso_v4_doi_remove(doi, audit_info); cfg_cipsov4_add_map_failure: if (entry != NULL) kfree(entry->domain); kfree(entry); - return ret_val; -} - -/** - * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition - * @doi: the CIPSO DOI value - * @audit_info: NetLabel audit information - * - * Description: - * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. - * Returns zero on success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) -{ - return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); + goto cfg_cipsov4_add_map_return; } /* @@ -452,7 +422,9 @@ int netlbl_enabled(void) * Attach the correct label to the given socket using the security attributes * specified in @secattr. This function requires exclusive access to @sk, * which means it either needs to be in the process of being created or locked. - * Returns zero on success, negative values on failure. + * Returns zero on success, -EDESTADDRREQ if the domain is configured to use + * network address selectors (can't blindly label the socket), and negative + * values on all other failures. * */ int netlbl_sock_setattr(struct sock *sk, @@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk, if (dom_entry == NULL) goto socket_setattr_return; switch (dom_entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + ret_val = -EDESTADDRREQ; + break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, dom_entry->type_def.cipsov4, @@ -484,6 +459,20 @@ socket_setattr_return: } /** + * netlbl_sock_delattr - Delete all the NetLabel labels on a socket + * @sk: the socket + * + * Description: + * Remove all the NetLabel labeling from @sk. The caller is responsible for + * ensuring that @sk is locked. + * + */ +void netlbl_sock_delattr(struct sock *sk) +{ + cipso_v4_sock_delattr(sk); +} + +/** * netlbl_sock_getattr - Determine the security attributes of a sock * @sk: the sock * @secattr: the security attributes @@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** + * netlbl_conn_setattr - Label a connected socket using the correct protocol + * @sk: the socket to label + * @addr: the destination address + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given connected socket using the security + * attributes specified in @secattr. The caller is responsible for ensuring + * that @sk is locked. Returns zero on success, negative values on failure. + * + */ +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct sockaddr_in *addr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (addr->sa_family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto conn_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_sock_setattr(sk, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_sock_delattr(sk); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +conn_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_skbuff_setattr - Label a packet using the correct protocol + * @skb: the packet + * @family: protocol family + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given packet using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *hdr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (family) { + case AF_INET: + hdr4 = ip_hdr(skb); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + hdr4->daddr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto skbuff_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_skbuff_setattr(skb, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + ret_val = cipso_v4_skbuff_delattr(skb); + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +skbuff_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet * @family: protocol family @@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * netlbl_skbuff_err - Handle a LSM error on a sk_buff * @skb: the packet * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise * * Description: * Deal with a LSM problem when handling the packet in @skb, typically this is @@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * according to the packet's labeling protocol. * */ -void netlbl_skbuff_err(struct sk_buff *skb, int error) +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { if (CIPSO_V4_OPTEXIST(skb)) - cipso_v4_error(skb, error, 0); + cipso_v4_error(skb, error, gateway); } /** diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 22c191267808..ee769ecaa13c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,9 +32,13 @@ #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/in6.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> +#include <net/ip.h> +#include <net/ipv6.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <asm/atomic.h> @@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { }; /* - * NetLabel Command Handlers + * Helper Functions */ /** * netlbl_mgmt_add - Handle an ADD message - * @skb: the NETLINK buffer * @info: the Generic NETLINK info block + * @audit_info: NetLabel audit information * * Description: - * Process a user generated ADD message and add the domains from the message - * to the hash table. See netlabel.h for a description of the message format. - * Returns zero on success, negative values on failure. + * Helper function for the ADD and ADDDEF messages to add the domain mappings + * from the message to the hash table. See netlabel.h for a description of the + * message format. Returns zero on success, negative values on failure. * */ -static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +static int netlbl_mgmt_add_common(struct genl_info *info, + struct netlbl_audit *audit_info) { int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; - size_t tmp_size; + struct netlbl_domaddr_map *addrmap = NULL; + struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; - struct netlbl_audit audit_info; - - if (!info->attrs[NLBL_MGMT_A_DOMAIN] || - !info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto add_failure; - - netlbl_netlink_auditinfo(skb, &audit_info); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; goto add_failure; } - tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); - entry->domain = kmalloc(tmp_size, GFP_KERNEL); - if (entry->domain == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + if (info->attrs[NLBL_MGMT_A_DOMAIN]) { + size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + nla_strlcpy(entry->domain, + info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + } + + /* NOTE: internally we allow/use a entry->type value of + * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users + * to pass that as a protocol value because we need to know the + * "real" protocol */ switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto add_failure; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); + cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (cipsov4 == NULL) goto add_failure; - } - ret_val = netlbl_domhsh_add(entry, &audit_info); - rcu_read_unlock(); + entry->type_def.cipsov4 = cipsov4; break; default: goto add_failure; } + + if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { + struct in_addr *addr; + struct in_addr *mask; + struct netlbl_domaddr4_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + map->list.addr = addr->s_addr & mask->s_addr; + map->list.mask = mask->s_addr; + map->list.valid = 1; + map->type = entry->type; + if (cipsov4) + map->type_def.cipsov4 = cipsov4; + + ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { + struct in6_addr *addr; + struct in6_addr *mask; + struct netlbl_domaddr6_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + ipv6_addr_copy(&map->list.addr, addr); + map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; + map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; + map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; + map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&map->list.mask, mask); + map->list.valid = 1; + map->type = entry->type; + + ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#endif /* IPv6 */ + } + + ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) goto add_failure; return 0; add_failure: + if (cipsov4) + cipso_v4_doi_putdef(cipsov4); if (entry) kfree(entry->domain); + kfree(addrmap); kfree(entry); return ret_val; } /** + * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry + * @skb: the NETLINK buffer + * @entry: the map entry + * + * Description: + * This function is a helper function used by the LISTALL and LISTDEF command + * handlers. The caller is responsibile for ensuring that the RCU read lock + * is held. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_listentry(struct sk_buff *skb, + struct netlbl_dom_map *entry) +{ + int ret_val; + struct nlattr *nla_a; + struct nlattr *nla_b; + struct netlbl_af4list *iter4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; +#endif + + if (entry->domain != NULL) { + ret_val = nla_put_string(skb, + NLBL_MGMT_A_DOMAIN, entry->domain); + if (ret_val != 0) + return ret_val; + } + + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); + if (nla_a == NULL) + return -ENOMEM; + + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + struct netlbl_domaddr4_map *map4; + struct in_addr addr_struct; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + addr_struct.s_addr = iter4->addr; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + addr_struct.s_addr = iter4->mask; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + map4 = netlbl_domhsh_addr4_entry(iter4); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map4->type); + if (ret_val != 0) + return ret_val; + switch (map4->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + map4->type_def.cipsov4->doi); + if (ret_val != 0) + return ret_val; + break; + } + + nla_nest_end(skb, nla_b); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) { + struct netlbl_domaddr6_map *map6; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, + sizeof(struct in6_addr), + &iter6->addr); + if (ret_val != 0) + return ret_val; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, + sizeof(struct in6_addr), + &iter6->mask); + if (ret_val != 0) + return ret_val; + map6 = netlbl_domhsh_addr6_entry(iter6); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map6->type); + if (ret_val != 0) + return ret_val; + + nla_nest_end(skb, nla_b); + } +#endif /* IPv6 */ + + nla_nest_end(skb, nla_a); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + return ret_val; + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + break; + } + + return ret_val; +} + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_mgmt_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADD message and add the domains from the message + * to the hash table. See netlabel.h for a description of the message format. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +{ + struct netlbl_audit audit_info; + + if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) || + (!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_mgmt_add_common(info, &audit_info); +} + +/** * netlbl_mgmt_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block @@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) if (data == NULL) goto listall_cb_failure; - ret_val = nla_put_string(cb_arg->skb, - NLBL_MGMT_A_DOMAIN, - entry->domain); - if (ret_val != 0) - goto listall_cb_failure; - ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry); if (ret_val != 0) goto listall_cb_failure; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(cb_arg->skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listall_cb_failure; - break; - } cb_arg->seq++; return genlmsg_end(cb_arg->skb, data); @@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb, */ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; - u32 tmp_val; struct netlbl_audit audit_info; - if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto adddef_failure; + if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; netlbl_netlink_auditinfo(skb, &audit_info); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto adddef_failure; - } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto adddef_failure; - - tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); - goto adddef_failure; - } - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - rcu_read_unlock(); - break; - default: - goto adddef_failure; - } - if (ret_val != 0) - goto adddef_failure; - - return 0; - -adddef_failure: - kfree(entry); - return ret_val; + return netlbl_mgmt_add_common(info, &audit_info); } /** @@ -371,26 +578,13 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) ret_val = -ENOENT; goto listdef_failure_lock; } - ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); - if (ret_val != 0) - goto listdef_failure_lock; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(ans_skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listdef_failure_lock; - break; - } + ret_val = netlbl_mgmt_listentry(ans_skb, entry); rcu_read_unlock(); - - genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); if (ret_val != 0) goto listdef_failure; - return 0; + + genlmsg_end(ans_skb, data); + return genlmsg_reply(ans_skb, info); listdef_failure_lock: rcu_read_unlock(); @@ -501,11 +695,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) goto version_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto version_failure; - return 0; + return genlmsg_reply(ans_skb, info); version_failure: kfree_skb(ans_skb); diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index a43bff169d6b..05d96431f819 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -45,6 +45,16 @@ * NLBL_MGMT_A_DOMAIN * NLBL_MGMT_A_PROTOCOL * + * If IPv4 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV4ADDR + * NLBL_MGMT_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV6ADDR + * NLBL_MGMT_A_IPV6MASK + * * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * * NLBL_MGMT_A_CV4DOI @@ -68,13 +78,24 @@ * Required attributes: * * NLBL_MGMT_A_DOMAIN + * + * If the IP address selectors are not used the following attribute is + * required: + * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o ADDDEF: * Sent by an application to set the default domain mapping for the NetLabel @@ -100,15 +121,23 @@ * application there is no payload. On success the kernel should send a * response using the following format. * - * Required attributes: + * If the IP address selectors are not used the following attribute is + * required: * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o PROTOCOLS: * Sent by an application to request a list of configured NetLabel protocols @@ -162,6 +191,26 @@ enum { NLBL_MGMT_A_CV4DOI, /* (NLA_U32) * the CIPSOv4 DOI value */ + NLBL_MGMT_A_IPV6ADDR, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address */ + NLBL_MGMT_A_IPV6MASK, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address mask */ + NLBL_MGMT_A_IPV4ADDR, + /* (NLA_BINARY, struct in_addr) + * an IPv4 address */ + NLBL_MGMT_A_IPV4MASK, + /* (NLA_BINARY, struct in_addr) + * and IPv4 address mask */ + NLBL_MGMT_A_ADDRSELECTOR, + /* (NLA_NESTED) + * an IP address selector, must contain an address, mask, and protocol + * attribute plus any protocol specific attributes */ + NLBL_MGMT_A_SELECTORLIST, + /* (NLA_NESTED) + * the selector list, there must be at least one + * NLBL_MGMT_A_ADDRSELECTOR attribute */ __NLBL_MGMT_A_MAX, }; #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0099da5b2591..e8a5c32b0f10 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,6 +54,7 @@ #include <asm/atomic.h> #include "netlabel_user.h" +#include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" #include "netlabel_mgmt.h" @@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl { struct list_head *tbl; u32 size; }; +#define netlbl_unlhsh_addr4_entry(iter) \ + container_of(iter, struct netlbl_unlhsh_addr4, list) struct netlbl_unlhsh_addr4 { - __be32 addr; - __be32 mask; u32 secid; - u32 valid; - struct list_head list; + struct netlbl_af4list list; struct rcu_head rcu; }; +#define netlbl_unlhsh_addr6_entry(iter) \ + container_of(iter, struct netlbl_unlhsh_addr6, list) struct netlbl_unlhsh_addr6 { - struct in6_addr addr; - struct in6_addr mask; u32 secid; - u32 valid; - struct list_head list; + struct netlbl_af6list list; struct rcu_head rcu; }; struct netlbl_unlhsh_iface { @@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1 }; /* - * Audit Helper Functions - */ - -/** - * netlbl_unlabel_audit_addr4 - Audit an IPv4 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv4 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, - const char *dev, - __be32 addr, __be32 mask) -{ - u32 mask_val = ntohl(mask); - - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr)); - if (mask_val != 0xffffffff) { - u32 mask_len = 0; - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -/** - * netlbl_unlabel_audit_addr6 - Audit an IPv6 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv6 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, - const char *dev, - const struct in6_addr *addr, - const struct in6_addr *mask) -{ - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr)); - if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { - u32 mask_len = 0; - u32 mask_val; - int iter = -1; - while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) - mask_len += 32; - mask_val = ntohl(mask->s6_addr32[iter]); - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} -#endif /* IPv6 */ - -/* * Unlabeled Connection Hash Table Functions */ @@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry) static void netlbl_unlhsh_free_iface(struct rcu_head *entry) { struct netlbl_unlhsh_iface *iface; - struct netlbl_unlhsh_addr4 *iter4; - struct netlbl_unlhsh_addr4 *tmp4; - struct netlbl_unlhsh_addr6 *iter6; - struct netlbl_unlhsh_addr6 *tmp6; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); /* no need for locks here since we are the only one with access to this * structure */ - list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list) - if (iter4->valid) { - list_del_rcu(&iter4->list); - kfree(iter4); - } - list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list) - if (iter6->valid) { - list_del_rcu(&iter6->list); - kfree(iter6); - } + netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) { + netlbl_af4list_remove_entry(iter4); + kfree(netlbl_unlhsh_addr4_entry(iter4)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { + netlbl_af6list_remove_entry(iter6); + kfree(netlbl_unlhsh_addr6_entry(iter6)); + } +#endif /* IPv6 */ kfree(iface); } @@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex) } /** - * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry - * @addr: IPv4 address - * @iface: the network interface entry - * - * Description: - * Searches the IPv4 address list of the network interface specified by @iface. - * If a matching address entry is found it is returned, otherwise NULL is - * returned. The caller is responsible for calling the rcu_read_[un]lock() - * functions. - * - */ -static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4( - __be32 addr, - const struct netlbl_unlhsh_iface *iface) -{ - struct netlbl_unlhsh_addr4 *iter; - - list_for_each_entry_rcu(iter, &iface->addr4_list, list) - if (iter->valid && (addr & iter->mask) == iter->addr) - return iter; - - return NULL; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -/** - * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry - * @addr: IPv6 address - * @iface: the network interface entry - * - * Description: - * Searches the IPv6 address list of the network interface specified by @iface. - * If a matching address entry is found it is returned, otherwise NULL is - * returned. The caller is responsible for calling the rcu_read_[un]lock() - * functions. - * - */ -static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( - const struct in6_addr *addr, - const struct netlbl_unlhsh_iface *iface) -{ - struct netlbl_unlhsh_addr6 *iter; - - list_for_each_entry_rcu(iter, &iface->addr6_list, list) - if (iter->valid && - ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) - return iter; - - return NULL; -} -#endif /* IPv6 */ - -/** * netlbl_unlhsh_search_iface - Search for a matching interface entry * @ifindex: the network interface * @@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) { u32 bkt; + struct list_head *bkt_list; struct netlbl_unlhsh_iface *iter; bkt = netlbl_unlhsh_hash(ifindex); - list_for_each_entry_rcu(iter, - &rcu_dereference(netlbl_unlhsh)->tbl[bkt], - list) + bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt]; + list_for_each_entry_rcu(iter, bkt_list, list) if (iter->valid && iter->ifindex == ifindex) return iter; @@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, const struct in_addr *mask, u32 secid) { + int ret_val; struct netlbl_unlhsh_addr4 *entry; - struct netlbl_unlhsh_addr4 *iter; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; - entry->addr = addr->s_addr & mask->s_addr; - entry->mask = mask->s_addr; - entry->secid = secid; - entry->valid = 1; + entry->list.addr = addr->s_addr & mask->s_addr; + entry->list.mask = mask->s_addr; + entry->list.valid = 1; INIT_RCU_HEAD(&entry->rcu); + entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); - iter = netlbl_unlhsh_search_addr4(entry->addr, iface); - if (iter != NULL && - iter->addr == addr->s_addr && iter->mask == mask->s_addr) { - spin_unlock(&netlbl_unlhsh_lock); - kfree(entry); - return -EEXIST; - } - /* in order to speed up address searches through the list (the common - * case) we need to keep the list in order based on the size of the - * address mask such that the entry with the widest mask (smallest - * numerical value) appears first in the list */ - list_for_each_entry_rcu(iter, &iface->addr4_list, list) - if (iter->valid && - ntohl(entry->mask) > ntohl(iter->mask)) { - __list_add_rcu(&entry->list, - iter->list.prev, - &iter->list); - spin_unlock(&netlbl_unlhsh_lock); - return 0; - } - list_add_tail_rcu(&entry->list, &iface->addr4_list); + ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); - return 0; + + if (ret_val != 0) + kfree(entry); + return ret_val; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, const struct in6_addr *mask, u32 secid) { + int ret_val; struct netlbl_unlhsh_addr6 *entry; - struct netlbl_unlhsh_addr6 *iter; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; - ipv6_addr_copy(&entry->addr, addr); - entry->addr.s6_addr32[0] &= mask->s6_addr32[0]; - entry->addr.s6_addr32[1] &= mask->s6_addr32[1]; - entry->addr.s6_addr32[2] &= mask->s6_addr32[2]; - entry->addr.s6_addr32[3] &= mask->s6_addr32[3]; - ipv6_addr_copy(&entry->mask, mask); - entry->secid = secid; - entry->valid = 1; + ipv6_addr_copy(&entry->list.addr, addr); + entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; + entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; + entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; + entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&entry->list.mask, mask); + entry->list.valid = 1; INIT_RCU_HEAD(&entry->rcu); + entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); - iter = netlbl_unlhsh_search_addr6(&entry->addr, iface); - if (iter != NULL && - (ipv6_addr_equal(&iter->addr, addr) && - ipv6_addr_equal(&iter->mask, mask))) { - spin_unlock(&netlbl_unlhsh_lock); - kfree(entry); - return -EEXIST; - } - /* in order to speed up address searches through the list (the common - * case) we need to keep the list in order based on the size of the - * address mask such that the entry with the widest mask (smallest - * numerical value) appears first in the list */ - list_for_each_entry_rcu(iter, &iface->addr6_list, list) - if (iter->valid && - ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { - __list_add_rcu(&entry->list, - iter->list.prev, - &iter->list); - spin_unlock(&netlbl_unlhsh_lock); - return 0; - } - list_add_tail_rcu(&entry->list, &iface->addr6_list); + ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); + + if (ret_val != 0) + kfree(entry); return 0; } #endif /* IPv6 */ @@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net, mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr4(audit_buf, - dev_name, - addr4->s_addr, - mask4->s_addr); + netlbl_af4list_audit_addr(audit_buf, 1, + dev_name, + addr4->s_addr, + mask4->s_addr); break; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net, mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr6(audit_buf, - dev_name, - addr6, mask6); + netlbl_af6list_audit_addr(audit_buf, 1, + dev_name, + addr6, mask6); break; } #endif /* IPv6 */ @@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; + int ret_val = 0; + struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; - struct audit_buffer *audit_buf = NULL; + struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx = NULL; + char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); - entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface); - if (entry != NULL && - entry->addr == addr->s_addr && entry->mask == mask->s_addr) { - entry->valid = 0; - list_del_rcu(&entry->list); - ret_val = 0; - } + list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, + &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); + if (list_entry == NULL) + ret_val = -ENOENT; + entry = netlbl_unlhsh_addr4_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr4(audit_buf, - (dev != NULL ? dev->name : NULL), - entry->addr, entry->mask); + netlbl_af4list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } @@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; + int ret_val = 0; + struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; - struct audit_buffer *audit_buf = NULL; + struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx = NULL; + char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); - entry = netlbl_unlhsh_search_addr6(addr, iface); - if (entry != NULL && - (ipv6_addr_equal(&entry->addr, addr) && - ipv6_addr_equal(&entry->mask, mask))) { - entry->valid = 0; - list_del_rcu(&entry->list); - ret_val = 0; - } + list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); + if (list_entry == NULL) + ret_val = -ENOENT; + entry = netlbl_unlhsh_addr6_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr6(audit_buf, - (dev != NULL ? dev->name : NULL), - addr, mask); + netlbl_af6list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr, mask); if (dev != NULL) dev_put(dev); - if (security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } @@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, */ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) { - struct netlbl_unlhsh_addr4 *iter4; - struct netlbl_unlhsh_addr6 *iter6; + struct netlbl_af4list *iter4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; +#endif /* IPv6 */ spin_lock(&netlbl_unlhsh_lock); - list_for_each_entry_rcu(iter4, &iface->addr4_list, list) - if (iter4->valid) - goto unlhsh_condremove_failure; - list_for_each_entry_rcu(iter6, &iface->addr6_list, list) - if (iter6->valid) - goto unlhsh_condremove_failure; + netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) + goto unlhsh_condremove_failure; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) + goto unlhsh_condremove_failure; +#endif /* IPv6 */ iface->valid = 0; if (iface->ifindex > 0) list_del_rcu(&iface->list); @@ -954,7 +795,7 @@ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, struct net_device *dev = ptr; struct netlbl_unlhsh_iface *iface = NULL; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ @@ -1107,11 +948,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) goto list_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - return 0; + return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); @@ -1353,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, if (addr4) { struct in_addr addr_struct; - addr_struct.s_addr = addr4->addr; + addr_struct.s_addr = addr4->list.addr; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV4ADDR, sizeof(struct in_addr), @@ -1361,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, if (ret_val != 0) goto list_cb_failure; - addr_struct.s_addr = addr4->mask; + addr_struct.s_addr = addr4->list.mask; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV4MASK, sizeof(struct in_addr), @@ -1374,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV6ADDR, sizeof(struct in6_addr), - &addr6->addr); + &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV6MASK, sizeof(struct in6_addr), - &addr6->mask); + &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; @@ -1429,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, u32 iter_bkt; u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; - struct netlbl_unlhsh_addr4 *addr4; - struct netlbl_unlhsh_addr6 *addr6; + struct list_head *iter_list; + struct netlbl_af4list *addr4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *addr6; +#endif cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -1440,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { - list_for_each_entry_rcu(iface, - &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt], - list) { + iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; + list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || iter_chain++ < skip_chain) continue; - list_for_each_entry_rcu(addr4, - &iface->addr4_list, - list) { - if (!addr4->valid || iter_addr4++ < skip_addr4) + netlbl_af4list_foreach_rcu(addr4, + &iface->addr4_list) { + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( - NLBL_UNLABEL_C_STATICLIST, - iface, - addr4, - NULL, - &cb_arg) < 0) { + NLBL_UNLABEL_C_STATICLIST, + iface, + netlbl_unlhsh_addr4_entry(addr4), + NULL, + &cb_arg) < 0) { iter_addr4--; iter_chain--; goto unlabel_staticlist_return; } } - list_for_each_entry_rcu(addr6, - &iface->addr6_list, - list) { - if (!addr6->valid || iter_addr6++ < skip_addr6) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(addr6, + &iface->addr6_list) { + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( - NLBL_UNLABEL_C_STATICLIST, - iface, - NULL, - addr6, - &cb_arg) < 0) { + NLBL_UNLABEL_C_STATICLIST, + iface, + NULL, + netlbl_unlhsh_addr6_entry(addr6), + &cb_arg) < 0) { iter_addr6--; iter_chain--; goto unlabel_staticlist_return; } } +#endif /* IPv6 */ } } @@ -1508,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, struct netlbl_unlhsh_iface *iface; u32 skip_addr4 = cb->args[0]; u32 skip_addr6 = cb->args[1]; - u32 iter_addr4 = 0, iter_addr6 = 0; - struct netlbl_unlhsh_addr4 *addr4; - struct netlbl_unlhsh_addr6 *addr6; + u32 iter_addr4 = 0; + struct netlbl_af4list *addr4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u32 iter_addr6 = 0; + struct netlbl_af6list *addr6; +#endif cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -1521,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, if (iface == NULL || !iface->valid) goto unlabel_staticlistdef_return; - list_for_each_entry_rcu(addr4, &iface->addr4_list, list) { - if (!addr4->valid || iter_addr4++ < skip_addr4) + netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, - iface, - addr4, - NULL, - &cb_arg) < 0) { + iface, + netlbl_unlhsh_addr4_entry(addr4), + NULL, + &cb_arg) < 0) { iter_addr4--; goto unlabel_staticlistdef_return; } } - list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (addr6->valid || iter_addr6++ < skip_addr6) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, - iface, - NULL, - addr6, - &cb_arg) < 0) { + iface, + NULL, + netlbl_unlhsh_addr6_entry(addr6), + &cb_arg) < 0) { iter_addr6--; goto unlabel_staticlistdef_return; } } +#endif /* IPv6 */ unlabel_staticlistdef_return: rcu_read_unlock(); @@ -1722,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, switch (family) { case PF_INET: { struct iphdr *hdr4; - struct netlbl_unlhsh_addr4 *addr4; + struct netlbl_af4list *addr4; hdr4 = ip_hdr(skb); - addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); + addr4 = netlbl_af4list_search(hdr4->saddr, + &iface->addr4_list); if (addr4 == NULL) goto unlabel_getattr_nolabel; - secattr->attr.secid = addr4->secid; + secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; break; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: { struct ipv6hdr *hdr6; - struct netlbl_unlhsh_addr6 *addr6; + struct netlbl_af6list *addr6; hdr6 = ipv6_hdr(skb); - addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); + addr6 = netlbl_af6list_search(&hdr6->saddr, + &iface->addr6_list); if (addr6 == NULL) goto unlabel_getattr_nolabel; - secattr->attr.secid = addr6->secid; + secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid; break; } #endif /* IPv6 */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f8006c9c..2fd8afac5f71 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1,7 +1,7 @@ /* * NETLINK Kernel-user communication protocol. * - * Authors: Alan Cox <alan@redhat.com> + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -158,9 +158,10 @@ static void netlink_sock_destruct(struct sock *sk) printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!nlk_sk(sk)->groups); + + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(nlk_sk(sk)->groups); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on @@ -759,7 +760,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -886,13 +887,13 @@ retry: return netlink_unicast_kernel(sk, skb); if (sk_filter(sk, skb)) { - int err = skb->len; + err = skb->len; kfree_skb(skb); sock_put(sk); return err; } - err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); + err = netlink_attachskb(sk, skb, &timeo, ssk); if (err == 1) goto retry; if (err) diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 47bbf45ae5d7..2d106cfe1d27 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,7 @@ errout: * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream + * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessable via the attribute type. Attributes with a type @@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) /** * nla_strlcpy - Copy string attribute payload into a sized buffer * @dst: where to copy the string to - * @src: attribute to copy the string from + * @nla: attribute to copy the string from * @dstsize: size of destination buffer * * Copies at most dstsize - 1 bytes into the destination buffer. @@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** - * nla_reserve - reserve room for attribute without header + * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on - * @len: length of attribute payload + * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4bae8b998cab..9f1ea4a27b35 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -73,6 +73,20 @@ static const struct proto_ops nr_proto_ops; * separate class since they always nest. */ static struct lock_class_key nr_netdev_xmit_lock_key; +static struct lock_class_key nr_netdev_addr_lock_key; + +static void nr_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key); +} + +static void nr_set_lockdep_key(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); +} /* * Socket removal during an interrupt is now safe. @@ -106,7 +120,7 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN) @@ -475,13 +489,11 @@ static struct sock *nr_make_new(struct sock *osk) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); skb_queue_head_init(&nr->ack_queue); @@ -513,6 +525,7 @@ static int nr_release(struct socket *sock) if (sk == NULL) return 0; sock_hold(sk); + sock_orphan(sk); lock_sock(sk); nr = nr_sk(sk); @@ -536,13 +549,10 @@ static int nr_release(struct socket *sock) sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); - sock_orphan(sk); sock_set_flag(sk, SOCK_DESTROY); - sk->sk_socket = NULL; break; default: - sk->sk_socket = NULL; break; } @@ -810,13 +820,11 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) goto out_release; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ kfree_skb(skb); sk_acceptq_removed(sk); - newsock->sk = newsk; out_release: release_sock(sk); @@ -1436,7 +1444,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } - lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key); + nr_set_lockdep_key(dev); dev_nr[i] = dev; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2cee87da4441..c718e7e3f7de 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,8 +5,6 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> @@ -188,6 +186,9 @@ struct packet_sock { unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; #endif }; @@ -203,14 +204,52 @@ struct packet_skb_cb { #ifdef CONFIG_PACKET_MMAP -static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position) +static void *packet_lookup_frame(struct packet_sock *po, unsigned int position, + int status) { unsigned int pg_vec_pos, frame_offset; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; pg_vec_pos = position / po->frames_per_block; frame_offset = position % po->frames_per_block; - return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size)); + h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); + switch (po->tp_version) { + case TPACKET_V1: + if (status != h.h1->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL) + return NULL; + break; + case TPACKET_V2: + if (status != h.h2->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL) + return NULL; + break; + } + return h.raw; +} + +static void __packet_set_status(struct packet_sock *po, void *frame, int status) +{ + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; + + h.raw = frame; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_status = status; + break; + case TPACKET_V2: + h.h2->tp_status = status; + break; + } } #endif @@ -221,8 +260,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) static void packet_sock_destruct(struct sock *sk) { - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive packet socket: %p\n", sk); @@ -553,14 +592,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - struct tpacket_hdr *h; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; u8 * skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; - unsigned short macoff, netoff; + unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timeval tv; + struct timespec ts; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -592,10 +636,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = res; if (sk->sk_type == SOCK_DGRAM) { - macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; + macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + + po->tp_reserve; } else { unsigned maclen = skb_network_offset(skb); - netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); + netoff = TPACKET_ALIGN(po->tp_hdrlen + + (maclen < 16 ? 16 : maclen)) + + po->tp_reserve; macoff = netoff - maclen; } @@ -618,9 +665,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe } spin_lock(&sk->sk_receive_queue.lock); - h = packet_lookup_frame(po, po->head); - - if (h->tp_status) + h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL); + if (!h.raw) goto ring_is_full; po->head = po->head != po->frame_max ? po->head+1 : 0; po->stats.tp_packets++; @@ -632,20 +678,41 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - h->tp_len = skb->len; - h->tp_snaplen = snaplen; - h->tp_mac = macoff; - h->tp_net = netoff; - if (skb->tstamp.tv64) - tv = ktime_to_timeval(skb->tstamp); - else - do_gettimeofday(&tv); - h->tp_sec = tv.tv_sec; - h->tp_usec = tv.tv_usec; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_len = skb->len; + h.h1->tp_snaplen = snaplen; + h.h1->tp_mac = macoff; + h.h1->tp_net = netoff; + if (skb->tstamp.tv64) + tv = ktime_to_timeval(skb->tstamp); + else + do_gettimeofday(&tv); + h.h1->tp_sec = tv.tv_sec; + h.h1->tp_usec = tv.tv_usec; + hdrlen = sizeof(*h.h1); + break; + case TPACKET_V2: + h.h2->tp_len = skb->len; + h.h2->tp_snaplen = snaplen; + h.h2->tp_mac = macoff; + h.h2->tp_net = netoff; + if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h2->tp_sec = ts.tv_sec; + h.h2->tp_nsec = ts.tv_nsec; + h.h2->tp_vlan_tci = skb->vlan_tci; + hdrlen = sizeof(*h.h2); + break; + default: + BUG(); + } - sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); + sll = h.raw + TPACKET_ALIGN(hdrlen); sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; @@ -656,14 +723,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else sll->sll_ifindex = dev->ifindex; - h->tp_status = status; + __packet_set_status(po, h.raw, status); smp_mb(); { struct page *p_start, *p_end; - u8 *h_end = (u8 *)h + macoff + snaplen - 1; + u8 *h_end = h.raw + macoff + snaplen - 1; - p_start = virt_to_page(h); + p_start = virt_to_page(h.raw); p_end = virt_to_page(h_end); while (p_start <= p_end) { flush_dcache_page(p_start); @@ -1109,6 +1176,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); + aux.tp_vlan_tci = skb->vlan_tci; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -1175,7 +1243,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, return 0; } -static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) +static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, + int what) { switch (i->type) { case PACKET_MR_MULTICAST: @@ -1185,13 +1254,14 @@ static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int w dev_mc_delete(dev, i->addr, i->alen, 0); break; case PACKET_MR_PROMISC: - dev_set_promiscuity(dev, what); + return dev_set_promiscuity(dev, what); break; case PACKET_MR_ALLMULTI: - dev_set_allmulti(dev, what); + return dev_set_allmulti(dev, what); break; default:; } + return 0; } static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) @@ -1245,7 +1315,11 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->count = 1; i->next = po->mclist; po->mclist = i; - packet_dev_mc(dev, i, +1); + err = packet_dev_mc(dev, i, 1); + if (err) { + po->mclist = i->next; + kfree(i); + } done: rtnl_unlock(); @@ -1358,6 +1432,38 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv pkt_sk(sk)->copy_thresh = val; return 0; } + case PACKET_VERSION: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + switch (val) { + case TPACKET_V1: + case TPACKET_V2: + po->tp_version = val; + return 0; + default: + return -EINVAL; + } + } + case PACKET_RESERVE: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_reserve = val; + return 0; + } #endif case PACKET_AUXDATA: { @@ -1433,6 +1539,37 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, data = &val; break; +#ifdef CONFIG_PACKET_MMAP + case PACKET_VERSION: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_version; + data = &val; + break; + case PACKET_HDRLEN: + if (len > sizeof(int)) + len = sizeof(int); + if (copy_from_user(&val, optval, len)) + return -EFAULT; + switch (val) { + case TPACKET_V1: + val = sizeof(struct tpacket_hdr); + break; + case TPACKET_V2: + val = sizeof(struct tpacket2_hdr); + break; + default: + return -EINVAL; + } + data = &val; + break; + case PACKET_RESERVE: + if (len > sizeof(unsigned int)) + len = sizeof(unsigned int); + val = po->tp_reserve; + data = &val; + break; +#endif default: return -ENOPROTOOPT; } @@ -1540,7 +1677,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (sock_net(sk) != &init_net) + if (!net_eq(sock_net(sk), &init_net)) return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif @@ -1566,11 +1703,8 @@ static unsigned int packet_poll(struct file * file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->pg_vec) { unsigned last = po->head ? po->head-1 : po->frame_max; - struct tpacket_hdr *h; - - h = packet_lookup_frame(po, last); - if (h->tp_status) + if (packet_lookup_frame(po, last, TP_STATUS_USER)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -1665,11 +1799,21 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (unlikely(po->pg_vec)) return -EBUSY; + switch (po->tp_version) { + case TPACKET_V1: + po->tp_hdrlen = TPACKET_HDRLEN; + break; + case TPACKET_V2: + po->tp_hdrlen = TPACKET2_HDRLEN; + break; + } + if (unlikely((int)req->tp_block_size <= 0)) return -EINVAL; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) return -EINVAL; - if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) return -EINVAL; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) return -EINVAL; @@ -1688,13 +1832,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing goto out; for (i = 0; i < req->tp_block_nr; i++) { - char *ptr = pg_vec[i]; - struct tpacket_hdr *header; + void *ptr = pg_vec[i]; int k; for (k = 0; k < po->frames_per_block; k++) { - header = (struct tpacket_hdr *) ptr; - header->tp_status = TP_STATUS_KERNEL; + __packet_set_status(po, ptr, TP_STATUS_KERNEL); ptr += req->tp_frame_size; } } diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig new file mode 100644 index 000000000000..51a5669573f2 --- /dev/null +++ b/net/phonet/Kconfig @@ -0,0 +1,16 @@ +# +# Phonet protocol +# + +config PHONET + tristate "Phonet protocols family" + help + The Phone Network protocol (PhoNet) is a packet-oriented + communication protocol developped by Nokia for use with its modems. + + This is required for Maemo to use cellular data connectivity (if + supported). It can also be used to control Nokia phones + from a Linux computer, although AT commands may be easier to use. + + To compile this driver as a module, choose M here: the module + will be called phonet. If unsure, say N. diff --git a/net/phonet/Makefile b/net/phonet/Makefile new file mode 100644 index 000000000000..d62bbba649b3 --- /dev/null +++ b/net/phonet/Makefile @@ -0,0 +1,11 @@ +obj-$(CONFIG_PHONET) += phonet.o pn_pep.o + +phonet-objs := \ + pn_dev.o \ + pn_netlink.o \ + socket.o \ + datagram.o \ + sysctl.o \ + af_phonet.o + +pn_pep-objs := pep.o pep-gprs.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c new file mode 100644 index 000000000000..9e9c6fce11aa --- /dev/null +++ b/net/phonet/af_phonet.c @@ -0,0 +1,477 @@ +/* + * File: af_phonet.c + * + * Phonet protocols family + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/unaligned.h> +#include <net/sock.h> + +#include <linux/if_phonet.h> +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pn_dev.h> + +static struct net_proto_family phonet_proto_family; +static struct phonet_protocol *phonet_proto_get(int protocol); +static inline void phonet_proto_put(struct phonet_protocol *pp); + +/* protocol family functions */ + +static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +{ + struct sock *sk; + struct pn_sock *pn; + struct phonet_protocol *pnp; + int err; + + if (net != &init_net) + return -EAFNOSUPPORT; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (protocol == 0) { + /* Default protocol selection */ + switch (sock->type) { + case SOCK_DGRAM: + protocol = PN_PROTO_PHONET; + break; + case SOCK_SEQPACKET: + protocol = PN_PROTO_PIPE; + break; + default: + return -EPROTONOSUPPORT; + } + } + + pnp = phonet_proto_get(protocol); +#ifdef CONFIG_KMOD + if (pnp == NULL && + request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0) + pnp = phonet_proto_get(protocol); +#endif + if (pnp == NULL) + return -EPROTONOSUPPORT; + if (sock->type != pnp->sock_type) { + err = -EPROTONOSUPPORT; + goto out; + } + + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + if (sk == NULL) { + err = -ENOMEM; + goto out; + } + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = pnp->ops; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_protocol = protocol; + pn = pn_sk(sk); + pn->sobject = 0; + pn->resource = 0; + sk->sk_prot->init(sk); + err = 0; + +out: + phonet_proto_put(pnp); + return err; +} + +static struct net_proto_family phonet_proto_family = { + .family = PF_PHONET, + .create = pn_socket_create, + .owner = THIS_MODULE, +}; + +/* Phonet device header operations */ +static int pn_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + u8 *media = skb_push(skb, 1); + + if (type != ETH_P_PHONET) + return -1; + + if (!saddr) + saddr = dev->dev_addr; + *media = *(const u8 *)saddr; + return 1; +} + +static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const u8 *media = skb_mac_header(skb); + *haddr = *media; + return 1; +} + +struct header_ops phonet_header_ops = { + .create = pn_header_create, + .parse = pn_header_parse, +}; +EXPORT_SYMBOL(phonet_header_ops); + +/* + * Prepends an ISI header and sends a datagram. + */ +static int pn_send(struct sk_buff *skb, struct net_device *dev, + u16 dst, u16 src, u8 res, u8 irq) +{ + struct phonethdr *ph; + int err; + + if (skb->len + 2 > 0xffff) { + /* Phonet length field would overflow */ + err = -EMSGSIZE; + goto drop; + } + + skb_reset_transport_header(skb); + WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */ + skb_push(skb, sizeof(struct phonethdr)); + skb_reset_network_header(skb); + ph = pn_hdr(skb); + ph->pn_rdev = pn_dev(dst); + ph->pn_sdev = pn_dev(src); + ph->pn_res = res; + ph->pn_length = __cpu_to_be16(skb->len + 2 - sizeof(*ph)); + ph->pn_robj = pn_obj(dst); + ph->pn_sobj = pn_obj(src); + + skb->protocol = htons(ETH_P_PHONET); + skb->priority = 0; + skb->dev = dev; + + if (pn_addr(src) == pn_addr(dst)) { + skb_reset_mac_header(skb); + skb->pkt_type = PACKET_LOOPBACK; + skb_orphan(skb); + if (irq) + netif_rx(skb); + else + netif_rx_ni(skb); + err = 0; + } else { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + NULL, NULL, skb->len); + if (err < 0) { + err = -EHOSTUNREACH; + goto drop; + } + err = dev_queue_xmit(skb); + } + + return err; +drop: + kfree_skb(skb); + return err; +} + +static int pn_raw_send(const void *data, int len, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct sk_buff *skb = alloc_skb(MAX_PHONET_HEADER + len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, MAX_PHONET_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + return pn_send(skb, dev, dst, src, res, 1); +} + +/* + * Create a Phonet header for the skb and send it out. Returns + * non-zero error code if failed. The skb is freed then. + */ +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target) +{ + struct net_device *dev; + struct pn_sock *pn = pn_sk(sk); + int err; + u16 src; + u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + + err = -EHOSTUNREACH; + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (!dev || !(dev->flags & IFF_UP)) + goto drop; + + saddr = phonet_address_get(dev, daddr); + if (saddr == PN_NO_ADDR) + goto drop; + + src = pn->sobject; + if (!pn_addr(src)) + src = pn_object(saddr, pn_obj(src)); + + err = pn_send(skb, dev, pn_sockaddr_get_object(target), + src, pn_sockaddr_get_resource(target), 0); + dev_put(dev); + return err; + +drop: + kfree_skb(skb); + if (dev) + dev_put(dev); + return err; +} +EXPORT_SYMBOL(pn_skb_send); + +/* Do not send an error message in response to an error message */ +static inline int can_respond(struct sk_buff *skb) +{ + const struct phonethdr *ph; + const struct phonetmsg *pm; + u8 submsg_id; + + if (!pskb_may_pull(skb, 3)) + return 0; + + ph = pn_hdr(skb); + if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev) + return 0; /* we are not the destination */ + if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) + return 0; + + ph = pn_hdr(skb); /* re-acquires the pointer */ + pm = pn_msg(skb); + if (pm->pn_msg_id != PN_COMMON_MESSAGE) + return 1; + submsg_id = (ph->pn_res == PN_PREFIX) + ? pm->pn_e_submsg_id : pm->pn_submsg_id; + if (submsg_id != PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP && + pm->pn_e_submsg_id != PN_COMM_SERVICE_NOT_IDENTIFIED_RESP) + return 1; + return 0; +} + +static int send_obj_unreachable(struct sk_buff *rskb) +{ + const struct phonethdr *oph = pn_hdr(rskb); + const struct phonetmsg *opm = pn_msg(rskb); + struct phonetmsg resp; + + memset(&resp, 0, sizeof(resp)); + resp.pn_trans_id = opm->pn_trans_id; + resp.pn_msg_id = PN_COMMON_MESSAGE; + if (oph->pn_res == PN_PREFIX) { + resp.pn_e_res_id = opm->pn_e_res_id; + resp.pn_e_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_e_orig_msg_id = opm->pn_msg_id; + resp.pn_e_status = 0; + } else { + resp.pn_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_orig_msg_id = opm->pn_msg_id; + resp.pn_status = 0; + } + return pn_raw_send(&resp, sizeof(resp), rskb->dev, + pn_object(oph->pn_sdev, oph->pn_sobj), + pn_object(oph->pn_rdev, oph->pn_robj), + oph->pn_res); +} + +static int send_reset_indications(struct sk_buff *rskb) +{ + struct phonethdr *oph = pn_hdr(rskb); + static const u8 data[4] = { + 0x00 /* trans ID */, 0x10 /* subscribe msg */, + 0x00 /* subscription count */, 0x00 /* dummy */ + }; + + return pn_raw_send(data, sizeof(data), rskb->dev, + pn_object(oph->pn_sdev, 0x00), + pn_object(oph->pn_rdev, oph->pn_robj), 0x10); +} + + +/* packet type functions */ + +/* + * Stuff received packets to associated sockets. + * On error, returns non-zero and releases the skb. + */ +static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkttype, + struct net_device *orig_dev) +{ + struct phonethdr *ph; + struct sock *sk; + struct sockaddr_pn sa; + u16 len; + + if (dev_net(dev) != &init_net) + goto out; + + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; + + /* check that the advertised length is correct */ + ph = pn_hdr(skb); + len = get_unaligned_be16(&ph->pn_length); + if (len < 2) + goto out; + len -= 2; + if ((len > skb->len) || pskb_trim(skb, len)) + goto out; + skb_reset_transport_header(skb); + + pn_skb_get_dst_sockaddr(skb, &sa); + if (pn_sockaddr_get_addr(&sa) == 0) + goto out; /* currently, we cannot be device 0 */ + + sk = pn_find_sock_by_sa(&sa); + if (sk == NULL) { + if (can_respond(skb)) { + send_obj_unreachable(skb); + send_reset_indications(skb); + } + goto out; + } + + /* Push data to the socket (or other sockets connected to it). */ + return sk_receive_skb(sk, skb, 0); + +out: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type phonet_packet_type = { + .type = __constant_htons(ETH_P_PHONET), + .dev = NULL, + .func = phonet_rcv, +}; + +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +int __init_or_module phonet_proto_register(int protocol, + struct phonet_protocol *pp) +{ + int err = 0; + + if (protocol >= PHONET_NPROTO) + return -EINVAL; + + err = proto_register(pp->prot, 1); + if (err) + return err; + + spin_lock(&proto_tab_lock); + if (proto_tab[protocol]) + err = -EBUSY; + else + proto_tab[protocol] = pp; + spin_unlock(&proto_tab_lock); + + return err; +} +EXPORT_SYMBOL(phonet_proto_register); + +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) +{ + spin_lock(&proto_tab_lock); + BUG_ON(proto_tab[protocol] != pp); + proto_tab[protocol] = NULL; + spin_unlock(&proto_tab_lock); + proto_unregister(pp->prot); +} +EXPORT_SYMBOL(phonet_proto_unregister); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} + +/* Module registration */ +static int __init phonet_init(void) +{ + int err; + + err = sock_register(&phonet_proto_family); + if (err) { + printk(KERN_ALERT + "phonet protocol family initialization failed\n"); + return err; + } + + phonet_device_init(); + dev_add_pack(&phonet_packet_type); + phonet_netlink_register(); + phonet_sysctl_init(); + + err = isi_register(); + if (err) + goto err; + return 0; + +err: + phonet_sysctl_exit(); + sock_unregister(PF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); + return err; +} + +static void __exit phonet_exit(void) +{ + isi_unregister(); + phonet_sysctl_exit(); + sock_unregister(PF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); +} + +module_init(phonet_init); +module_exit(phonet_exit); +MODULE_DESCRIPTION("Phonet protocol stack for Linux"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_PHONET); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c new file mode 100644 index 000000000000..e087862ed7e4 --- /dev/null +++ b/net/phonet/datagram.c @@ -0,0 +1,197 @@ +/* + * File: datagram.c + * + * Datagram (ISI) Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/socket.h> +#include <asm/ioctls.h> +#include <net/sock.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> + +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); + +/* associated socket ceases to exist */ +static void pn_sock_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int answ; + + switch (cmd) { + case SIOCINQ: + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + answ = skb ? skb->len : 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +/* Destroy socket. All references are gone. */ +static void pn_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pn_init(struct sock *sk) +{ + sk->sk_destruct = pn_destruct; + return 0; +} + +static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct sockaddr_pn *target; + struct sk_buff *skb; + int err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_name == NULL) + return -EDESTADDRREQ; + + if (msg->msg_namelen < sizeof(struct sockaddr_pn)) + return -EINVAL; + + target = (struct sockaddr_pn *)msg->msg_name; + if (target->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) + return err; + skb_reserve(skb, MAX_PHONET_HEADER); + + err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len); + if (err < 0) { + kfree_skb(skb); + return err; + } + + /* + * Fill in the Phonet header and + * finally pass the packet forwards. + */ + err = pn_skb_send(sk, skb, target); + + /* If ok, return len. */ + return (err >= 0) ? len : err; +} + +static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb = NULL; + struct sockaddr_pn sa; + int rval = -EOPNOTSUPP; + int copylen; + + if (flags & MSG_OOB) + goto out_nofree; + + if (addr_len) + *addr_len = sizeof(sa); + + skb = skb_recv_datagram(sk, flags, noblock, &rval); + if (skb == NULL) + goto out_nofree; + + pn_skb_get_src_sockaddr(skb, &sa); + + copylen = skb->len; + if (len < copylen) { + msg->msg_flags |= MSG_TRUNC; + copylen = len; + } + + rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen); + if (rval) { + rval = -EFAULT; + goto out; + } + + rval = (flags & MSG_TRUNC) ? skb->len : copylen; + + if (msg->msg_name != NULL) + memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + +out: + skb_free_datagram(sk, skb); + +out_nofree: + return rval; +} + +/* Queue an skb for a sock. */ +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + return err ? NET_RX_DROP : NET_RX_SUCCESS; +} + +/* Module registration */ +static struct proto pn_proto = { + .close = pn_sock_close, + .ioctl = pn_ioctl, + .init = pn_init, + .sendmsg = pn_sendmsg, + .recvmsg = pn_recvmsg, + .backlog_rcv = pn_backlog_rcv, + .hash = pn_sock_hash, + .unhash = pn_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pn_sock), + .owner = THIS_MODULE, + .name = "PHONET", +}; + +static struct phonet_protocol pn_dgram_proto = { + .ops = &phonet_dgram_ops, + .prot = &pn_proto, + .sock_type = SOCK_DGRAM, +}; + +int __init isi_register(void) +{ + return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); +} + +void __exit isi_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); +} diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c new file mode 100644 index 000000000000..9978afbd9f2a --- /dev/null +++ b/net/phonet/pep-gprs.c @@ -0,0 +1,347 @@ +/* + * File: pep-gprs.c + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <net/sock.h> + +#include <linux/if_phonet.h> +#include <net/tcp_states.h> +#include <net/phonet/gprs.h> + +#define GPRS_DEFAULT_MTU 1400 + +struct gprs_dev { + struct sock *sk; + void (*old_state_change)(struct sock *); + void (*old_data_ready)(struct sock *, int); + void (*old_write_space)(struct sock *); + + struct net_device *net; + struct net_device_stats stats; + + struct sk_buff_head tx_queue; + struct work_struct tx_work; + spinlock_t tx_lock; + unsigned tx_max; +}; + +static int gprs_type_trans(struct sk_buff *skb) +{ + const u8 *pvfc; + u8 buf; + + pvfc = skb_header_pointer(skb, 0, 1, &buf); + if (!pvfc) + return 0; + /* Look at IP version field */ + switch (*pvfc >> 4) { + case 4: + return htons(ETH_P_IP); + case 6: + return htons(ETH_P_IPV6); + } + return 0; +} + +/* + * Socket callbacks + */ + +static void gprs_state_change(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + netif_stop_queue(dev->net); + netif_carrier_off(dev->net); + } +} + +static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) +{ + int err = 0; + u16 protocol = gprs_type_trans(skb); + + if (!protocol) { + err = -EINVAL; + goto drop; + } + + if (likely(skb_headroom(skb) & 3)) { + struct sk_buff *rskb, *fs; + int flen = 0; + + /* Phonet Pipe data header is misaligned (3 bytes), + * so wrap the IP packet as a single fragment of an head-less + * socket buffer. The network stack will pull what it needs, + * but at least, the whole IP payload is not memcpy'd. */ + rskb = netdev_alloc_skb(dev->net, 0); + if (!rskb) { + err = -ENOBUFS; + goto drop; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb = rskb; + } + + skb->protocol = protocol; + skb_reset_mac_header(skb); + skb->dev = dev->net; + + if (likely(dev->net->flags & IFF_UP)) { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + netif_rx(skb); + skb = NULL; + } else + err = -ENODEV; + +drop: + if (skb) { + dev_kfree_skb(skb); + dev->stats.rx_dropped++; + } + return err; +} + +static void gprs_data_ready(struct sock *sk, int len) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct sk_buff *skb; + + while ((skb = pep_read(sk)) != NULL) { + skb_orphan(skb); + gprs_recv(dev, skb); + } +} + +static void gprs_write_space(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + unsigned credits = pep_writeable(sk); + + spin_lock_bh(&dev->tx_lock); + dev->tx_max = credits; + if (credits > skb_queue_len(&dev->tx_queue)) + netif_wake_queue(dev->net); + spin_unlock_bh(&dev->tx_lock); +} + +/* + * Network device callbacks + */ + +static int gprs_xmit(struct sk_buff *skb, struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + break; + default: + dev_kfree_skb(skb); + return 0; + } + + spin_lock(&dev->tx_lock); + if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) { + skb_queue_tail(&dev->tx_queue, skb); + skb = NULL; + } + if (skb_queue_len(&dev->tx_queue) >= dev->tx_max) + netif_stop_queue(net); + spin_unlock(&dev->tx_lock); + + schedule_work(&dev->tx_work); + if (unlikely(skb)) + dev_kfree_skb(skb); + return 0; +} + +static void gprs_tx(struct work_struct *work) +{ + struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); + struct sock *sk = dev->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { + int err; + + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + + skb_orphan(skb); + skb_set_owner_w(skb, sk); + + lock_sock(sk); + err = pep_write(sk, skb); + if (err) { + LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", + dev->net->name, err); + dev->stats.tx_aborted_errors++; + dev->stats.tx_errors++; + } + release_sock(sk); + } + + lock_sock(sk); + gprs_write_space(sk); + release_sock(sk); +} + +static int gprs_set_mtu(struct net_device *net, int new_mtu) +{ + if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) + return -EINVAL; + + net->mtu = new_mtu; + return 0; +} + +static struct net_device_stats *gprs_get_stats(struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + return &dev->stats; +} + +static void gprs_setup(struct net_device *net) +{ + net->features = NETIF_F_FRAGLIST; + net->type = ARPHRD_NONE; + net->flags = IFF_POINTOPOINT | IFF_NOARP; + net->mtu = GPRS_DEFAULT_MTU; + net->hard_header_len = 0; + net->addr_len = 0; + net->tx_queue_len = 10; + + net->destructor = free_netdev; + net->hard_start_xmit = gprs_xmit; /* mandatory */ + net->change_mtu = gprs_set_mtu; + net->get_stats = gprs_get_stats; +} + +/* + * External interface + */ + +/* + * Attach a GPRS interface to a datagram socket. + * Returns the interface index on success, negative error code on error. + */ +int gprs_attach(struct sock *sk) +{ + static const char ifname[] = "gprs%d"; + struct gprs_dev *dev; + struct net_device *net; + int err; + + if (unlikely(sk->sk_type == SOCK_STREAM)) + return -EINVAL; /* need packet boundaries */ + + /* Create net device */ + net = alloc_netdev(sizeof(*dev), ifname, gprs_setup); + if (!net) + return -ENOMEM; + dev = netdev_priv(net); + dev->net = net; + dev->tx_max = 0; + spin_lock_init(&dev->tx_lock); + skb_queue_head_init(&dev->tx_queue); + INIT_WORK(&dev->tx_work, gprs_tx); + + netif_stop_queue(net); + err = register_netdev(net); + if (err) { + free_netdev(net); + return err; + } + + lock_sock(sk); + if (unlikely(sk->sk_user_data)) { + err = -EBUSY; + goto out_rel; + } + if (unlikely((1 << sk->sk_state & (TCPF_CLOSE|TCPF_LISTEN)) || + sock_flag(sk, SOCK_DEAD))) { + err = -EINVAL; + goto out_rel; + } + sk->sk_user_data = dev; + dev->old_state_change = sk->sk_state_change; + dev->old_data_ready = sk->sk_data_ready; + dev->old_write_space = sk->sk_write_space; + sk->sk_state_change = gprs_state_change; + sk->sk_data_ready = gprs_data_ready; + sk->sk_write_space = gprs_write_space; + release_sock(sk); + + sock_hold(sk); + dev->sk = sk; + + printk(KERN_DEBUG"%s: attached\n", net->name); + gprs_write_space(sk); /* kick off TX */ + return net->ifindex; + +out_rel: + release_sock(sk); + unregister_netdev(net); + return err; +} + +void gprs_detach(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; + + lock_sock(sk); + sk->sk_user_data = NULL; + sk->sk_state_change = dev->old_state_change; + sk->sk_data_ready = dev->old_data_ready; + sk->sk_write_space = dev->old_write_space; + release_sock(sk); + + printk(KERN_DEBUG"%s: detached\n", net->name); + unregister_netdev(net); + flush_scheduled_work(); + sock_put(sk); + skb_queue_purge(&dev->tx_queue); +} diff --git a/net/phonet/pep.c b/net/phonet/pep.c new file mode 100644 index 000000000000..bc6d50f83249 --- /dev/null +++ b/net/phonet/pep.c @@ -0,0 +1,1076 @@ +/* + * File: pep.c + * + * Phonet pipe protocol end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/socket.h> +#include <net/sock.h> +#include <net/tcp_states.h> +#include <asm/ioctls.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pep.h> +#include <net/phonet/gprs.h> + +/* sk_state values: + * TCP_CLOSE sock not in use yet + * TCP_CLOSE_WAIT disconnected pipe + * TCP_LISTEN listening pipe endpoint + * TCP_SYN_RECV connected pipe in disabled state + * TCP_ESTABLISHED connected pipe in enabled state + * + * pep_sock locking: + * - sk_state, ackq, hlist: sock lock needed + * - listener: read only + * - pipe_handle: read only + */ + +#define CREDITS_MAX 10 +#define CREDITS_THR 7 + +static const struct sockaddr_pn pipe_srv = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, /* pipe service */ +}; + +#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ + +/* Get the next TLV sub-block. */ +static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, + void *buf) +{ + void *data = NULL; + struct { + u8 sb_type; + u8 sb_len; + } *ph, h; + int buflen = *plen; + + ph = skb_header_pointer(skb, 0, 2, &h); + if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len)) + return NULL; + ph->sb_len -= 2; + *ptype = ph->sb_type; + *plen = ph->sb_len; + + if (buflen > ph->sb_len) + buflen = ph->sb_len; + data = skb_header_pointer(skb, 2, buflen, buf); + __skb_pull(skb, 2 + ph->sb_len); + return data; +} + +static int pep_reply(struct sock *sk, struct sk_buff *oskb, + u8 code, const void *data, int len, gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +#define PAD 0x00 +static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) +{ + static const u8 data[20] = { + PAD, PAD, PAD, 2 /* sub-blocks */, + PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + }; + + might_sleep(); + return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data), + GFP_KERNEL); +} + +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +{ + static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; + WARN_ON(code == PN_PIPE_NO_ERROR); + return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); +} + +/* Control requests are not sent by the pipe service and have a specific + * message format. */ +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, + gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct sk_buff *skb; + struct pnpipehdr *ph; + struct sockaddr_pn dst; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PHONET_HEADER); + ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + + ph->utid = oph->utid; + ph->message_id = PNS_PEP_CTRL_RESP; + ph->pipe_handle = oph->pipe_handle; + ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data[1] = oph->data[0]; /* PEP type */ + ph->data[2] = code; /* error code, at an usual offset */ + ph->data[3] = PAD; + ph->data[4] = PAD; + + pn_skb_get_src_sockaddr(oskb, &dst); + return pn_skb_send(sk, skb, &dst); +} + +static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER + 4); + __skb_push(skb, sizeof(*ph) + 4); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PEP_STATUS_IND; + ph->pipe_handle = pn->pipe_handle; + ph->pep_type = PN_PEP_TYPE_COMMON; + ph->data[1] = type; + ph->data[2] = PAD; + ph->data[3] = PAD; + ph->data[4] = status; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +/* Send our RX flow control information to the sender. + * Socket must be locked. */ +static void pipe_grant_credits(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + switch (pn->rx_fc) { + case PN_LEGACY_FLOW_CONTROL: /* TODO */ + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, GFP_ATOMIC); + pn->rx_credits = 1; + break; + case PN_MULTI_CREDIT_FLOW_CONTROL: + if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) + break; + if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, + CREDITS_MAX - pn->rx_credits, + GFP_ATOMIC) == 0) + pn->rx_credits = CREDITS_MAX; + break; + } +} + +static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", + (unsigned)hdr->data[0]); + return -EOPNOTSUPP; + } + + switch (hdr->data[1]) { + case PN_PEP_IND_FLOW_CONTROL: + switch (pn->tx_fc) { + case PN_LEGACY_FLOW_CONTROL: + switch (hdr->data[4]) { + case PEP_IND_BUSY: + pn->tx_credits = 0; + break; + case PEP_IND_READY: + pn->tx_credits = 1; + break; + } + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + if (hdr->data[4] == PEP_IND_READY) + pn->tx_credits = 1; + break; + } + break; + + case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: + if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) + break; + if (pn->tx_credits + hdr->data[4] > 0xff) + pn->tx_credits = 0xff; + else + pn->tx_credits += hdr->data[4]; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n", + (unsigned)hdr->data[1]); + return -EOPNOTSUPP; + } + if (pn->tx_credits) + sk->sk_write_space(sk); + return 0; +} + +static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + u8 n_sb = hdr->data[0]; + + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + __skb_pull(skb, sizeof(*hdr)); + while (n_sb > 0) { + u8 type, buf[2], len = sizeof(buf); + u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_NEGOTIATED_FC: + if (len < 2 || (data[0] | data[1]) > 3) + break; + pn->tx_fc = data[0] & 3; + pn->rx_fc = data[1] & 3; + break; + } + n_sb--; + } + return 0; +} + +/* Queue an skb to a connected sock. + * Socket lock must be held. */ +static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sk_buff_head *queue; + int err = 0; + + BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + sk->sk_state = TCP_CLOSE_WAIT; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + + case PNS_PEP_ENABLE_REQ: + /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + switch (hdr->state_after_reset) { + case PN_PIPE_DISABLE: + pn->init_enable = 0; + break; + case PN_PIPE_ENABLE: + pn->init_enable = 1; + break; + default: /* not allowed to send an error here!? */ + err = -EINVAL; + goto out; + } + /* fall through */ + case PNS_PEP_DISABLE_REQ: + pn->tx_credits = 0; + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) + break; + __skb_pull(skb, 4); + queue = &pn->ctrlreq_queue; + goto queue; + + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return 0; + break; + } + + if (pn->rx_credits == 0) { + err = -ENOBUFS; + break; + } + pn->rx_credits--; + queue = &sk->sk_receive_queue; + goto queue; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + + case PNS_PIPE_REDIRECTED_IND: + err = pipe_rcv_created(sk, skb); + break; + + case PNS_PIPE_CREATED_IND: + err = pipe_rcv_created(sk, skb); + if (err) + break; + /* fall through */ + case PNS_PIPE_RESET_IND: + if (!pn->init_enable) + break; + /* fall through */ + case PNS_PIPE_ENABLED_IND: + if (!pn_flow_safe(pn->tx_fc)) { + pn->tx_credits = 1; + sk->sk_write_space(sk); + } + if (sk->sk_state == TCP_ESTABLISHED) + break; /* Nothing to do */ + sk->sk_state = TCP_ESTABLISHED; + pipe_grant_credits(sk); + break; + + case PNS_PIPE_DISABLED_IND: + sk->sk_state = TCP_SYN_RECV; + pn->rx_credits = 0; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n", + hdr->message_id); + err = -EINVAL; + } +out: + kfree_skb(skb); + return err; + +queue: + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; +} + +/* Destroy connected sock. */ +static void pipe_destruct(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&pn->ctrlreq_queue); +} + +static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct sock *newsk; + struct pep_sock *newpn, *pn = pep_sk(sk); + struct pnpipehdr *hdr; + struct sockaddr_pn dst; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + + if (!pskb_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); + return -EINVAL; + } + peer_type = hdr->other_pep_type << 8; + + if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + return -ENOBUFS; + } + + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + return -EINVAL; + peer_type = (peer_type & 0xff00) | data[0]; + break; + } + n_sb--; + } + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); + if (!newsk) { + kfree_skb(skb); + return -ENOMEM; + } + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.resource = pn->pn_sk.resource; + skb_queue_head_init(&newpn->ctrlreq_queue); + newpn->pipe_handle = pipe_handle; + newpn->peer_type = peer_type; + newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + + BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); + skb_queue_head(&newsk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + + sk_acceptq_added(sk); + sk_add_node(newsk, &pn->ackq); + return 0; +} + +/* Listening sock must be locked */ +static struct sock *pep_find_pipe(const struct hlist_head *hlist, + const struct sockaddr_pn *dst, + u8 pipe_handle) +{ + struct hlist_node *node; + struct sock *sknode; + u16 dobj = pn_sockaddr_get_object(dst); + + sk_for_each(sknode, node, hlist) { + struct pep_sock *pnnode = pep_sk(sknode); + + /* Ports match, but addresses might not: */ + if (pnnode->pn_sk.sobject != dobj) + continue; + if (pnnode->pipe_handle != pipe_handle) + continue; + if (sknode->sk_state == TCP_CLOSE_WAIT) + continue; + + sock_hold(sknode); + return sknode; + } + return NULL; +} + +/* + * Deliver an skb to a listening sock. + * Socket lock must be held. + * We then queue the skb to the right connected sock (if any). + */ +static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *sknode; + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sockaddr_pn dst; + int err = NET_RX_SUCCESS; + u8 pipe_handle; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + if (pipe_handle == PN_PIPE_INVALID_HANDLE) + goto drop; + + pn_skb_get_dst_sockaddr(skb, &dst); + + /* Look for an existing pipe handle */ + sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (sknode) + return sk_receive_skb(sknode, skb, 1); + + /* Look for a pipe handle pending accept */ + sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); + if (sknode) { + sock_put(sknode); + if (net_ratelimit()) + printk(KERN_WARNING"Phonet unconnected PEP ignored"); + err = NET_RX_DROP; + goto drop; + } + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + err = pep_connreq_rcv(sk, skb); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + case PNS_PEP_ENABLE_REQ: + case PNS_PEP_DISABLE_REQ: + /* invalid handle is not even allowed here! */ + default: + err = NET_RX_DROP; + } +drop: + kfree_skb(skb); + return err; +} + +/* associated socket ceases to exist */ +static void pep_sock_close(struct sock *sk, long timeout) +{ + struct pep_sock *pn = pep_sk(sk); + int ifindex = 0; + + sk_common_release(sk); + + lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + /* Destroy the listen queue */ + struct sock *sknode; + struct hlist_node *p, *n; + + sk_for_each_safe(sknode, p, n, &pn->ackq) + sk_del_node_init(sknode); + sk->sk_state = TCP_CLOSE; + } + ifindex = pn->ifindex; + pn->ifindex = 0; + release_sock(sk); + + if (ifindex) + gprs_detach(sk); +} + +static int pep_wait_connreq(struct sock *sk, int noblock) +{ + struct task_struct *tsk = current; + struct pep_sock *pn = pep_sk(sk); + long timeo = sock_rcvtimeo(sk, noblock); + + for (;;) { + DEFINE_WAIT(wait); + + if (sk->sk_state != TCP_LISTEN) + return -EINVAL; + if (!hlist_empty(&pn->ackq)) + break; + if (!timeo) + return -EWOULDBLOCK; + if (signal_pending(tsk)) + return sock_intr_errno(timeo); + + prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(&sk->sk_socket->wait, &wait); + } + + return 0; +} + +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *newsk = NULL; + struct sk_buff *oskb; + int err; + + lock_sock(sk); + err = pep_wait_connreq(sk, flags & O_NONBLOCK); + if (err) + goto out; + + newsk = __sk_head(&pn->ackq); + + oskb = skb_dequeue(&newsk->sk_receive_queue); + err = pep_accept_conn(newsk, oskb); + if (err) { + skb_queue_head(&newsk->sk_receive_queue, oskb); + newsk = NULL; + goto out; + } + + sock_hold(sk); + pep_sk(newsk)->listener = sk; + + sock_hold(newsk); + sk_del_node_init(newsk); + sk_acceptq_removed(sk); + sk_add_node(newsk, &pn->hlist); + __sock_put(newsk); + +out: + release_sock(sk); + *errp = err; + return newsk; +} + +static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct pep_sock *pn = pep_sk(sk); + int answ; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + lock_sock(sk); + if (sock_flag(sk, SOCK_URGINLINE) + && !skb_queue_empty(&pn->ctrlreq_queue)) + answ = skb_peek(&pn->ctrlreq_queue)->len; + else if (!skb_queue_empty(&sk->sk_receive_queue)) + answ = skb_peek(&sk->sk_receive_queue)->len; + else + answ = 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +static int pep_init(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + INIT_HLIST_HEAD(&pn->ackq); + INIT_HLIST_HEAD(&pn->hlist); + skb_queue_head_init(&pn->ctrlreq_queue); + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return 0; +} + +static int pep_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int val = 0, err = 0; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (optlen >= sizeof(int)) { + if (get_user(val, (int __user *) optval)) + return -EFAULT; + } + + lock_sock(sk); + switch (optname) { + case PNPIPE_ENCAP: + if (val && val != PNPIPE_ENCAP_IP) { + err = -EINVAL; + break; + } + if (!pn->ifindex == !val) + break; /* Nothing to do! */ + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (val) { + release_sock(sk); + err = gprs_attach(sk); + if (err > 0) { + pn->ifindex = err; + err = 0; + } + } else { + pn->ifindex = 0; + release_sock(sk); + gprs_detach(sk); + err = 0; + } + goto out_norel; + default: + err = -ENOPROTOOPT; + } + release_sock(sk); + +out_norel: + return err; +} + +static int pep_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int len, val; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (get_user(len, optlen)) + return -EFAULT; + + switch (optname) { + case PNPIPE_ENCAP: + val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; + break; + case PNPIPE_IFINDEX: + val = pn->ifindex; + break; + default: + return -ENOPROTOOPT; + } + + len = min_t(unsigned int, sizeof(int), len); + if (put_user(len, optlen)) + return -EFAULT; + if (put_user(val, (int __user *) optval)) + return -EFAULT; + return 0; +} + +static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + + skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) + pn->tx_credits--; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff *skb = NULL; + long timeo; + int flags = msg->msg_flags; + int err, done; + + if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) { + err = -ENOTCONN; + goto out; + } + if (sk->sk_state != TCP_ESTABLISHED) { + /* Wait until the pipe gets to enabled state */ +disabled: + err = sk_stream_wait_connect(sk, &timeo); + if (err) + goto out; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + err = -ECONNRESET; + goto out; + } + } + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + /* Wait until flow control allows TX */ + done = pn->tx_credits > 0; + while (!done) { + DEFINE_WAIT(wait); + + if (!timeo) { + err = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + + prepare_to_wait(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + finish_wait(&sk->sk_socket->wait, &wait); + + if (sk->sk_state != TCP_ESTABLISHED) + goto disabled; + } + + if (!skb) { + skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, + flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out; + skb_reserve(skb, MAX_PHONET_HEADER + 3); + + if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + goto disabled; /* sock_alloc_send_skb might sleep */ + } + + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err < 0) + goto out; + + err = pipe_skb_send(sk, skb); + if (err >= 0) + err = len; /* success! */ + skb = NULL; +out: + release_sock(sk); + kfree_skb(skb); + return err; +} + +int pep_writeable(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; +} + +int pep_write(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *rskb, *fs; + int flen = 0; + + rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + if (!rskb) { + kfree_skb(skb); + return -ENOMEM; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb_reserve(rskb, MAX_PHONET_HEADER + 3); + return pipe_skb_send(sk, rskb); +} + +struct sk_buff *pep_read(struct sock *sk) +{ + struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + return skb; +} + +static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb; + int err; + + if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) + return -ENOTCONN; + + if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) { + /* Dequeue and acknowledge control request */ + struct pep_sock *pn = pep_sk(sk); + + skb = skb_dequeue(&pn->ctrlreq_queue); + if (skb) { + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR, + GFP_KERNEL); + msg->msg_flags |= MSG_OOB; + goto copy; + } + if (flags & MSG_OOB) + return -EINVAL; + } + + skb = skb_recv_datagram(sk, flags, noblock, &err); + lock_sock(sk); + if (skb == NULL) { + if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + release_sock(sk); + return err; + } + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + release_sock(sk); +copy: + msg->msg_flags |= MSG_EOR; + if (skb->len > len) + msg->msg_flags |= MSG_TRUNC; + else + len = skb->len; + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len); + if (!err) + err = (flags & MSG_TRUNC) ? skb->len : len; + + skb_free_datagram(sk, skb); + return err; +} + +static void pep_sock_unhash(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *skparent = NULL; + + lock_sock(sk); + if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + skparent = pn->listener; + sk_del_node_init(sk); + release_sock(sk); + + sk = skparent; + pn = pep_sk(skparent); + lock_sock(sk); + } + /* Unhash a listening sock only when it is closed + * and all of its active connected pipes are closed. */ + if (hlist_empty(&pn->hlist)) + pn_sock_unhash(&pn->pn_sk.sk); + release_sock(sk); + + if (skparent) + sock_put(skparent); +} + +static struct proto pep_proto = { + .close = pep_sock_close, + .accept = pep_sock_accept, + .ioctl = pep_ioctl, + .init = pep_init, + .setsockopt = pep_setsockopt, + .getsockopt = pep_getsockopt, + .sendmsg = pep_sendmsg, + .recvmsg = pep_recvmsg, + .backlog_rcv = pep_do_rcv, + .hash = pn_sock_hash, + .unhash = pep_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pep_sock), + .owner = THIS_MODULE, + .name = "PNPIPE", +}; + +static struct phonet_protocol pep_pn_proto = { + .ops = &phonet_stream_ops, + .prot = &pep_proto, + .sock_type = SOCK_SEQPACKET, +}; + +static int __init pep_register(void) +{ + return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto); +} + +static void __exit pep_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto); +} + +module_init(pep_register); +module_exit(pep_unregister); +MODULE_AUTHOR("Remi Denis-Courmont, Nokia"); +MODULE_DESCRIPTION("Phonet pipe protocol"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c new file mode 100644 index 000000000000..53be9fc82aaa --- /dev/null +++ b/net/phonet/pn_dev.c @@ -0,0 +1,208 @@ +/* + * File: pn_dev.c + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/phonet.h> +#include <net/sock.h> +#include <net/phonet/pn_dev.h> + +/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ +struct phonet_device_list pndevs = { + .list = LIST_HEAD_INIT(pndevs.list), + .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock), +}; + +/* Allocate new Phonet device. */ +static struct phonet_device *__phonet_device_alloc(struct net_device *dev) +{ + struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); + if (pnd == NULL) + return NULL; + pnd->netdev = dev; + bitmap_zero(pnd->addrs, 64); + + list_add(&pnd->list, &pndevs.list); + return pnd; +} + +static struct phonet_device *__phonet_get(struct net_device *dev) +{ + struct phonet_device *pnd; + + list_for_each_entry(pnd, &pndevs.list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + +static void __phonet_device_free(struct phonet_device *pnd) +{ + list_del(&pnd->list); + kfree(pnd); +} + +struct net_device *phonet_device_get(struct net *net) +{ + struct phonet_device *pnd; + struct net_device *dev; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + dev = pnd->netdev; + BUG_ON(!dev); + + if (dev_net(dev) == net && + (dev->reg_state == NETREG_REGISTERED) && + ((pnd->netdev->flags & IFF_UP)) == IFF_UP) + break; + dev = NULL; + } + if (dev) + dev_hold(dev); + spin_unlock_bh(&pndevs.lock); + return dev; +} + +int phonet_address_add(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + /* Find or create Phonet-specific device data */ + pnd = __phonet_get(dev); + if (pnd == NULL) + pnd = __phonet_device_alloc(dev); + if (unlikely(pnd == NULL)) + err = -ENOMEM; + else if (test_and_set_bit(addr >> 2, pnd->addrs)) + err = -EEXIST; + spin_unlock_bh(&pndevs.lock); + return err; +} + +int phonet_address_del(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + err = -EADDRNOTAVAIL; + if (bitmap_empty(pnd->addrs, 64)) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + return err; +} + +/* Gets a source address toward a destination, through a interface. */ +u8 phonet_address_get(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) { + BUG_ON(bitmap_empty(pnd->addrs, 64)); + + /* Use same source address as destination, if possible */ + if (!test_bit(addr >> 2, pnd->addrs)) + addr = find_first_bit(pnd->addrs, 64) << 2; + } else + addr = PN_NO_ADDR; + spin_unlock_bh(&pndevs.lock); + return addr; +} + +int phonet_address_lookup(u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + /* Don't allow unregistering devices! */ + if ((pnd->netdev->reg_state != NETREG_REGISTERED) || + ((pnd->netdev->flags & IFF_UP)) != IFF_UP) + continue; + + if (test_bit(addr >> 2, pnd->addrs)) { + spin_unlock_bh(&pndevs.lock); + return 0; + } + } + spin_unlock_bh(&pndevs.lock); + return -EADDRNOTAVAIL; +} + +/* notify Phonet of device events */ +static int phonet_device_notify(struct notifier_block *me, unsigned long what, + void *arg) +{ + struct net_device *dev = arg; + + if (what == NETDEV_UNREGISTER) { + struct phonet_device *pnd; + + /* Destroy phonet-specific device data */ + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + } + return 0; + +} + +static struct notifier_block phonet_device_notifier = { + .notifier_call = phonet_device_notify, + .priority = 0, +}; + +/* Initialize Phonet devices list */ +void phonet_device_init(void) +{ + register_netdevice_notifier(&phonet_device_notifier); +} + +void phonet_device_exit(void) +{ + struct phonet_device *pnd, *n; + + rtnl_unregister_all(PF_PHONET); + rtnl_lock(); + spin_lock_bh(&pndevs.lock); + + list_for_each_entry_safe(pnd, n, &pndevs.list, list) + __phonet_device_free(pnd); + + spin_unlock_bh(&pndevs.lock); + rtnl_unlock(); + unregister_netdevice_notifier(&phonet_device_notifier); +} diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c new file mode 100644 index 000000000000..b1770d66bc8d --- /dev/null +++ b/net/phonet/pn_netlink.c @@ -0,0 +1,165 @@ +/* + * File: pn_netlink.c + * + * Phonet netlink interface + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/phonet.h> +#include <net/sock.h> +#include <net/phonet/pn_dev.h> + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event); + +static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_addr(skb, dev, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); +} + +static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { + [IFA_LOCAL] = { .type = NLA_U8 }, +}; + +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX+1]; + struct net_device *dev; + struct ifaddrmsg *ifm; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + if (tb[IFA_LOCAL] == NULL) + return -EINVAL; + pnaddr = nla_get_u8(tb[IFA_LOCAL]); + if (pnaddr & 3) + /* Phonet addresses only have 6 high-order bits */ + return -EINVAL; + + dev = __dev_get_by_index(net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (nlh->nlmsg_type == RTM_NEWADDR) + err = phonet_address_add(dev, pnaddr); + else + err = phonet_address_del(dev, pnaddr); + if (!err) + rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); + return err; +} + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_PHONET; + ifm->ifa_prefixlen = 0; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_LINK; + ifm->ifa_index = dev->ifindex; + NLA_PUT_U8(skb, IFA_LOCAL, addr); + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct phonet_device *pnd; + int dev_idx = 0, dev_start_idx = cb->args[0]; + int addr_idx = 0, addr_start_idx = cb->args[1]; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + u8 addr; + + if (dev_idx > dev_start_idx) + addr_start_idx = 0; + if (dev_idx++ < dev_start_idx) + continue; + + addr_idx = 0; + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + if (addr_idx++ < addr_start_idx) + continue; + + if (fill_addr(skb, pnd->netdev, addr << 2, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) + goto out; + } + } + +out: + spin_unlock_bh(&pndevs.lock); + cb->args[0] = dev_idx; + cb->args[1] = addr_idx; + + return skb->len; +} + +void __init phonet_netlink_register(void) +{ + rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); + rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); +} diff --git a/net/phonet/socket.c b/net/phonet/socket.c new file mode 100644 index 000000000000..d81740187fb4 --- /dev/null +++ b/net/phonet/socket.c @@ -0,0 +1,411 @@ +/* + * File: socket.c + * + * Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <net/sock.h> +#include <net/tcp_states.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pep.h> +#include <net/phonet/pn_dev.h> + +static int pn_socket_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static struct { + struct hlist_head hlist; + spinlock_t lock; +} pnsocks = { + .hlist = HLIST_HEAD_INIT, + .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), +}; + +/* + * Find address based on socket address, match only certain fields. + * Also grab sock if it was found. Remember to sock_put it later. + */ +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +{ + struct hlist_node *node; + struct sock *sknode; + struct sock *rval = NULL; + u16 obj = pn_sockaddr_get_object(spn); + u8 res = spn->spn_resource; + + spin_lock_bh(&pnsocks.lock); + + sk_for_each(sknode, node, &pnsocks.hlist) { + struct pn_sock *pn = pn_sk(sknode); + BUG_ON(!pn->sobject); /* unbound socket */ + + if (pn_port(obj)) { + /* Look up socket by port */ + if (pn_port(pn->sobject) != pn_port(obj)) + continue; + } else { + /* If port is zero, look up by resource */ + if (pn->resource != res) + continue; + } + if (pn_addr(pn->sobject) + && pn_addr(pn->sobject) != pn_addr(obj)) + continue; + + rval = sknode; + sock_hold(sknode); + break; + } + + spin_unlock_bh(&pnsocks.lock); + + return rval; + +} + +void pn_sock_hash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_add_node(sk, &pnsocks.hlist); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_hash); + +void pn_sock_unhash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_del_node_init(sk); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_unhash); + +static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; + u16 handle; + u8 saddr; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, addr, len); + + if (len < sizeof(struct sockaddr_pn)) + return -EINVAL; + if (spn->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); + saddr = pn_addr(handle); + if (saddr && phonet_address_lookup(saddr)) + return -EADDRNOTAVAIL; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) { + err = -EINVAL; /* attempt to rebind */ + goto out; + } + err = sk->sk_prot->get_port(sk, pn_port(handle)); + if (err) + goto out; + + /* get_port() sets the port, bind() sets the address if applicable */ + pn->sobject = pn_object(saddr, pn_port(pn->sobject)); + pn->resource = spn->spn_resource; + + /* Enable RX on the socket */ + sk->sk_prot->hash(sk); +out: + release_sock(sk); + return err; +} + +static int pn_socket_autobind(struct socket *sock) +{ + struct sockaddr_pn sa; + int err; + + memset(&sa, 0, sizeof(sa)); + sa.spn_family = AF_PHONET; + err = pn_socket_bind(sock, (struct sockaddr *)&sa, + sizeof(struct sockaddr_pn)); + if (err != -EINVAL) + return err; + BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); + return 0; /* socket was already bound */ +} + +static int pn_socket_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + struct sock *sk = sock->sk; + struct sock *newsk; + int err; + + newsk = sk->sk_prot->accept(sk, flags, &err); + if (!newsk) + return err; + + lock_sock(newsk); + sock_graft(newsk, newsock); + newsock->state = SS_CONNECTED; + release_sock(newsk); + return 0; +} + +static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + memset(addr, 0, sizeof(struct sockaddr_pn)); + addr->sa_family = AF_PHONET; + if (!peer) /* Race with bind() here is userland's problem. */ + pn_sockaddr_set_object((struct sockaddr_pn *)addr, + pn->sobject); + + *sockaddr_len = sizeof(struct sockaddr_pn); + return 0; +} + +static unsigned int pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct pep_sock *pn = pep_sk(sk); + unsigned int mask = 0; + + poll_wait(file, &sock->wait, wait); + + switch (sk->sk_state) { + case TCP_LISTEN: + return hlist_empty(&pn->ackq) ? 0 : POLLIN; + case TCP_CLOSE: + return POLLERR; + } + + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= POLLIN | POLLRDNORM; + if (!skb_queue_empty(&pn->ctrlreq_queue)) + mask |= POLLPRI; + if (!mask && sk->sk_state == TCP_CLOSE_WAIT) + return POLLHUP; + + if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + +static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + if (cmd == SIOCPNGETOBJECT) { + struct net_device *dev; + u16 handle; + u8 saddr; + + if (get_user(handle, (__u16 __user *)arg)) + return -EFAULT; + + lock_sock(sk); + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), + sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (dev && (dev->flags & IFF_UP)) + saddr = phonet_address_get(dev, pn_addr(handle)); + else + saddr = PN_NO_ADDR; + release_sock(sk); + + if (dev) + dev_put(dev); + if (saddr == PN_NO_ADDR) + return -EHOSTUNREACH; + + handle = pn_object(saddr, pn_port(pn->sobject)); + return put_user(handle, (__u16 __user *)arg); + } + + return sk->sk_prot->ioctl(sk, cmd, arg); +} + +static int pn_socket_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (sock->state != SS_UNCONNECTED) + return -EINVAL; + if (pn_socket_autobind(sock)) + return -ENOBUFS; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE) { + err = -EINVAL; + goto out; + } + + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + sk->sk_max_ack_backlog = backlog; +out: + release_sock(sk); + return err; +} + +static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct sock *sk = sock->sk; + + if (pn_socket_autobind(sock)) + return -EAGAIN; + + return sk->sk_prot->sendmsg(iocb, sk, m, total_len); +} + +const struct proto_ops phonet_dgram_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pn_socket_getname, + .poll = datagram_poll, + .ioctl = pn_socket_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +const struct proto_ops phonet_stream_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = pn_socket_accept, + .getname = pn_socket_getname, + .poll = pn_socket_poll, + .ioctl = pn_socket_ioctl, + .listen = pn_socket_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; +EXPORT_SYMBOL(phonet_stream_ops); + +static DEFINE_MUTEX(port_mutex); + +/* allocate port for a socket */ +int pn_sock_get_port(struct sock *sk, unsigned short sport) +{ + static int port_cur; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn try_sa; + struct sock *tmpsk; + + memset(&try_sa, 0, sizeof(struct sockaddr_pn)); + try_sa.spn_family = AF_PHONET; + + mutex_lock(&port_mutex); + + if (!sport) { + /* search free port */ + int port, pmin, pmax; + + phonet_get_local_port_range(&pmin, &pmax); + for (port = pmin; port <= pmax; port++) { + port_cur++; + if (port_cur < pmin || port_cur > pmax) + port_cur = pmin; + + pn_sockaddr_set_port(&try_sa, port_cur); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) { + sport = port_cur; + goto found; + } else + sock_put(tmpsk); + } + } else { + /* try to find specific port */ + pn_sockaddr_set_port(&try_sa, sport); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) + /* No sock there! We can use that port... */ + goto found; + else + sock_put(tmpsk); + } + mutex_unlock(&port_mutex); + + /* the port must be in use already */ + return -EADDRINUSE; + +found: + mutex_unlock(&port_mutex); + pn->sobject = pn_object(pn_addr(pn->sobject), sport); + return 0; +} +EXPORT_SYMBOL(pn_sock_get_port); diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c new file mode 100644 index 000000000000..600a4309b8c8 --- /dev/null +++ b/net/phonet/sysctl.c @@ -0,0 +1,113 @@ +/* + * File: sysctl.c + * + * Phonet /proc/sys/net/phonet interface implementation + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/seqlock.h> +#include <linux/sysctl.h> +#include <linux/errno.h> +#include <linux/init.h> + +#define DYNAMIC_PORT_MIN 0x40 +#define DYNAMIC_PORT_MAX 0x7f + +static DEFINE_SEQLOCK(local_port_range_lock); +static int local_port_range_min[2] = {0, 0}; +static int local_port_range_max[2] = {1023, 1023}; +static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX}; +static struct ctl_table_header *phonet_table_hrd; + +static void set_local_port_range(int range[2]) +{ + write_seqlock(&local_port_range_lock); + local_port_range[0] = range[0]; + local_port_range[1] = range[1]; + write_sequnlock(&local_port_range_lock); +} + +void phonet_get_local_port_range(int *min, int *max) +{ + unsigned seq; + do { + seq = read_seqbegin(&local_port_range_lock); + if (min) + *min = local_port_range[0]; + if (max) + *max = local_port_range[1]; + } while (read_seqretry(&local_port_range_lock, seq)); +} + +static int proc_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = {local_port_range[0], local_port_range[1]}; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_port_range_min, + .extra2 = &local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +static struct ctl_table phonet_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "local_port_range", + .data = &local_port_range, + .maxlen = sizeof(local_port_range), + .mode = 0644, + .proc_handler = &proc_local_port_range, + .strategy = NULL, + }, + { .ctl_name = 0 } +}; + +struct ctl_path phonet_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { }, +}; + +int __init phonet_sysctl_init(void) +{ + phonet_table_hrd = register_sysctl_paths(phonet_ctl_path, phonet_table); + return phonet_table_hrd == NULL ? -ENOMEM : 0; +} + +void phonet_sysctl_exit(void) +{ + unregister_sysctl_table(phonet_table_hrd); +} diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e4b051dbed61..21124ec0a73d 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/init.h> #include <linux/rfkill.h> +#include <linux/sched.h> #include "rfkill-input.h" @@ -30,39 +31,62 @@ struct rfkill_task { spinlock_t lock; /* for accessing last and desired state */ unsigned long last; /* last schedule */ enum rfkill_state desired_state; /* on/off */ - enum rfkill_state current_state; /* on/off */ }; static void rfkill_task_handler(struct work_struct *work) { struct rfkill_task *task = container_of(work, struct rfkill_task, work); - enum rfkill_state state; mutex_lock(&task->mutex); - /* - * Use temp variable to fetch desired state to keep it - * consistent even if rfkill_schedule_toggle() runs in - * another thread or interrupts us. - */ - state = task->desired_state; + rfkill_switch_all(task->type, task->desired_state); - if (state != task->current_state) { - rfkill_switch_all(task->type, state); - task->current_state = state; + mutex_unlock(&task->mutex); +} + +static void rfkill_task_epo_handler(struct work_struct *work) +{ + rfkill_epo(); +} + +static DECLARE_WORK(epo_work, rfkill_task_epo_handler); + +static void rfkill_schedule_epo(void) +{ + schedule_work(&epo_work); +} + +static void rfkill_schedule_set(struct rfkill_task *task, + enum rfkill_state desired_state) +{ + unsigned long flags; + + if (unlikely(work_pending(&epo_work))) + return; + + spin_lock_irqsave(&task->lock, flags); + + if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { + task->desired_state = desired_state; + task->last = jiffies; + schedule_work(&task->work); } - mutex_unlock(&task->mutex); + spin_unlock_irqrestore(&task->lock, flags); } static void rfkill_schedule_toggle(struct rfkill_task *task) { unsigned long flags; + if (unlikely(work_pending(&epo_work))) + return; + spin_lock_irqsave(&task->lock, flags); if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { - task->desired_state = !task->desired_state; + task->desired_state = + rfkill_state_complement(task->desired_state); task->last = jiffies; schedule_work(&task->work); } @@ -70,26 +94,45 @@ static void rfkill_schedule_toggle(struct rfkill_task *task) spin_unlock_irqrestore(&task->lock, flags); } -#define DEFINE_RFKILL_TASK(n, t) \ - struct rfkill_task n = { \ - .work = __WORK_INITIALIZER(n.work, \ - rfkill_task_handler), \ - .type = t, \ - .mutex = __MUTEX_INITIALIZER(n.mutex), \ - .lock = __SPIN_LOCK_UNLOCKED(n.lock), \ - .desired_state = RFKILL_STATE_ON, \ - .current_state = RFKILL_STATE_ON, \ +#define DEFINE_RFKILL_TASK(n, t) \ + struct rfkill_task n = { \ + .work = __WORK_INITIALIZER(n.work, \ + rfkill_task_handler), \ + .type = t, \ + .mutex = __MUTEX_INITIALIZER(n.mutex), \ + .lock = __SPIN_LOCK_UNLOCKED(n.lock), \ + .desired_state = RFKILL_STATE_UNBLOCKED, \ } static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX); +static DEFINE_RFKILL_TASK(rfkill_wwan, RFKILL_TYPE_WWAN); + +static void rfkill_schedule_evsw_rfkillall(int state) +{ + /* EVERY radio type. state != 0 means radios ON */ + /* handle EPO (emergency power off) through shortcut */ + if (state) { + rfkill_schedule_set(&rfkill_wwan, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_wimax, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_uwb, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_bt, + RFKILL_STATE_UNBLOCKED); + rfkill_schedule_set(&rfkill_wlan, + RFKILL_STATE_UNBLOCKED); + } else + rfkill_schedule_epo(); +} static void rfkill_event(struct input_handle *handle, unsigned int type, - unsigned int code, int down) + unsigned int code, int data) { - if (type == EV_KEY && down == 1) { + if (type == EV_KEY && data == 1) { switch (code) { case KEY_WLAN: rfkill_schedule_toggle(&rfkill_wlan); @@ -106,6 +149,14 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, default: break; } + } else if (type == EV_SW) { + switch (code) { + case SW_RFKILL_ALL: + rfkill_schedule_evsw_rfkillall(data); + break; + default: + break; + } } } @@ -123,6 +174,7 @@ static int rfkill_connect(struct input_handler *handler, struct input_dev *dev, handle->handler = handler; handle->name = "rfkill"; + /* causes rfkill_start() to be called */ error = input_register_handle(handle); if (error) goto err_free_handle; @@ -140,6 +192,23 @@ static int rfkill_connect(struct input_handler *handler, struct input_dev *dev, return error; } +static void rfkill_start(struct input_handle *handle) +{ + /* Take event_lock to guard against configuration changes, we + * should be able to deal with concurrency with rfkill_event() + * just fine (which event_lock will also avoid). */ + spin_lock_irq(&handle->dev->event_lock); + + if (test_bit(EV_SW, handle->dev->evbit)) { + if (test_bit(SW_RFKILL_ALL, handle->dev->swbit)) + rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL, + handle->dev->sw)); + /* add resync for further EV_SW events here */ + } + + spin_unlock_irq(&handle->dev->event_lock); +} + static void rfkill_disconnect(struct input_handle *handle) { input_close_device(handle); @@ -168,6 +237,11 @@ static const struct input_device_id rfkill_ids[] = { .evbit = { BIT_MASK(EV_KEY) }, .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, + .evbit = { BIT(EV_SW) }, + .swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) }, + }, { } }; @@ -175,6 +249,7 @@ static struct input_handler rfkill_handler = { .event = rfkill_event, .connect = rfkill_connect, .disconnect = rfkill_disconnect, + .start = rfkill_start, .name = "rfkill", .id_table = rfkill_ids, }; diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h index 4dae5006fc77..bbfa646157c6 100644 --- a/net/rfkill/rfkill-input.h +++ b/net/rfkill/rfkill-input.h @@ -12,5 +12,7 @@ #define __RFKILL_INPUT_H void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); +void rfkill_epo(void); +void rfkill_restore_states(void); #endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 4e10a95de832..f949a482b007 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -37,9 +37,65 @@ MODULE_DESCRIPTION("RF switch support"); MODULE_LICENSE("GPL"); static LIST_HEAD(rfkill_list); /* list of registered rf switches */ -static DEFINE_MUTEX(rfkill_mutex); +static DEFINE_MUTEX(rfkill_global_mutex); -static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX]; +static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED; +module_param_named(default_state, rfkill_default_state, uint, 0444); +MODULE_PARM_DESC(default_state, + "Default initial state for all radio types, 0 = radio off"); + +struct rfkill_gsw_state { + enum rfkill_state current_state; + enum rfkill_state default_state; +}; + +static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX]; +static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + +static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); + + +/** + * register_rfkill_notifier - Add notifier to rfkill notifier chain + * @nb: pointer to the new entry to add to the chain + * + * See blocking_notifier_chain_register() for return value and further + * observations. + * + * Adds a notifier to the rfkill notifier chain. The chain will be + * called with a pointer to the relevant rfkill structure as a parameter, + * refer to include/linux/rfkill.h for the possible events. + * + * Notifiers added to this chain are to always return NOTIFY_DONE. This + * chain is a blocking notifier chain: notifiers can sleep. + * + * Calls to this chain may have been done through a workqueue. One must + * assume unordered asynchronous behaviour, there is no way to know if + * actions related to the event that generated the notification have been + * carried out already. + */ +int register_rfkill_notifier(struct notifier_block *nb) +{ + BUG_ON(!nb); + return blocking_notifier_chain_register(&rfkill_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(register_rfkill_notifier); + +/** + * unregister_rfkill_notifier - remove notifier from rfkill notifier chain + * @nb: pointer to the entry to remove from the chain + * + * See blocking_notifier_chain_unregister() for return value and further + * observations. + * + * Removes a notifier from the rfkill notifier chain. + */ +int unregister_rfkill_notifier(struct notifier_block *nb) +{ + BUG_ON(!nb); + return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_rfkill_notifier); static void rfkill_led_trigger(struct rfkill *rfkill, @@ -50,55 +106,264 @@ static void rfkill_led_trigger(struct rfkill *rfkill, if (!led->name) return; - if (state == RFKILL_STATE_OFF) + if (state != RFKILL_STATE_UNBLOCKED) led_trigger_event(led, LED_OFF); else led_trigger_event(led, LED_FULL); #endif /* CONFIG_RFKILL_LEDS */ } +#ifdef CONFIG_RFKILL_LEDS +static void rfkill_led_trigger_activate(struct led_classdev *led) +{ + struct rfkill *rfkill = container_of(led->trigger, + struct rfkill, led_trigger); + + rfkill_led_trigger(rfkill, rfkill->state); +} +#endif /* CONFIG_RFKILL_LEDS */ + +static void notify_rfkill_state_change(struct rfkill *rfkill) +{ + rfkill_led_trigger(rfkill, rfkill->state); + blocking_notifier_call_chain(&rfkill_notifier_list, + RFKILL_STATE_CHANGED, + rfkill); +} + +static void update_rfkill_state(struct rfkill *rfkill) +{ + enum rfkill_state newstate, oldstate; + + if (rfkill->get_state) { + mutex_lock(&rfkill->mutex); + if (!rfkill->get_state(rfkill->data, &newstate)) { + oldstate = rfkill->state; + rfkill->state = newstate; + if (oldstate != newstate) + notify_rfkill_state_change(rfkill); + } + mutex_unlock(&rfkill->mutex); + } +} + +/** + * rfkill_toggle_radio - wrapper for toggle_radio hook + * @rfkill: the rfkill struct to use + * @force: calls toggle_radio even if cache says it is not needed, + * and also makes sure notifications of the state will be + * sent even if it didn't change + * @state: the new state to call toggle_radio() with + * + * Calls rfkill->toggle_radio, enforcing the API for toggle_radio + * calls and handling all the red tape such as issuing notifications + * if the call is successful. + * + * Suspended devices are not touched at all, and -EAGAIN is returned. + * + * Note that the @force parameter cannot override a (possibly cached) + * state of RFKILL_STATE_HARD_BLOCKED. Any device making use of + * RFKILL_STATE_HARD_BLOCKED implements either get_state() or + * rfkill_force_state(), so the cache either is bypassed or valid. + * + * Note that we do call toggle_radio for RFKILL_STATE_SOFT_BLOCKED + * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to + * give the driver a hint that it should double-BLOCK the transmitter. + * + * Caller must have acquired rfkill->mutex. + */ static int rfkill_toggle_radio(struct rfkill *rfkill, - enum rfkill_state state) + enum rfkill_state state, + int force) { int retval = 0; + enum rfkill_state oldstate, newstate; + + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return -EBUSY; + + oldstate = rfkill->state; + + if (rfkill->get_state && !force && + !rfkill->get_state(rfkill->data, &newstate)) + rfkill->state = newstate; + + switch (state) { + case RFKILL_STATE_HARD_BLOCKED: + /* typically happens when refreshing hardware state, + * such as on resume */ + state = RFKILL_STATE_SOFT_BLOCKED; + break; + case RFKILL_STATE_UNBLOCKED: + /* force can't override this, only rfkill_force_state() can */ + if (rfkill->state == RFKILL_STATE_HARD_BLOCKED) + return -EPERM; + break; + case RFKILL_STATE_SOFT_BLOCKED: + /* nothing to do, we want to give drivers the hint to double + * BLOCK even a transmitter that is already in state + * RFKILL_STATE_HARD_BLOCKED */ + break; + default: + WARN(1, KERN_WARNING + "rfkill: illegal state %d passed as parameter " + "to rfkill_toggle_radio\n", state); + return -EINVAL; + } - if (state != rfkill->state) { + if (force || state != rfkill->state) { retval = rfkill->toggle_radio(rfkill->data, state); - if (!retval) { + /* never allow a HARD->SOFT downgrade! */ + if (!retval && rfkill->state != RFKILL_STATE_HARD_BLOCKED) rfkill->state = state; - rfkill_led_trigger(rfkill, state); - } } + if (force || rfkill->state != oldstate) + notify_rfkill_state_change(rfkill); + return retval; } /** - * rfkill_switch_all - Toggle state of all switches of given type - * @type: type of interfaces to be affeceted + * __rfkill_switch_all - Toggle state of all switches of given type + * @type: type of interfaces to be affected * @state: the new state * - * This function toggles state of all switches of given type unless - * a specific switch is claimed by userspace in which case it is - * left alone. + * This function toggles the state of all switches of given type, + * unless a specific switch is claimed by userspace (in which case, + * that switch is left alone) or suspended. + * + * Caller must have acquired rfkill_global_mutex. */ +static void __rfkill_switch_all(const enum rfkill_type type, + const enum rfkill_state state) +{ + struct rfkill *rfkill; + if (WARN((state >= RFKILL_STATE_MAX || type >= RFKILL_TYPE_MAX), + KERN_WARNING + "rfkill: illegal state %d or type %d " + "passed as parameter to __rfkill_switch_all\n", + state, type)) + return; + + rfkill_global_states[type].current_state = state; + list_for_each_entry(rfkill, &rfkill_list, node) { + if ((!rfkill->user_claim) && (rfkill->type == type)) { + mutex_lock(&rfkill->mutex); + rfkill_toggle_radio(rfkill, state, 0); + mutex_unlock(&rfkill->mutex); + } + } +} + +/** + * rfkill_switch_all - Toggle state of all switches of given type + * @type: type of interfaces to be affected + * @state: the new state + * + * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). + * Please refer to __rfkill_switch_all() for details. + */ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) { - struct rfkill *rfkill; + mutex_lock(&rfkill_global_mutex); + __rfkill_switch_all(type, state); + mutex_unlock(&rfkill_global_mutex); +} +EXPORT_SYMBOL(rfkill_switch_all); - mutex_lock(&rfkill_mutex); +/** + * rfkill_epo - emergency power off all transmitters + * + * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED, + * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex. + * + * The global state before the EPO is saved and can be restored later + * using rfkill_restore_states(). + */ +void rfkill_epo(void) +{ + struct rfkill *rfkill; + int i; - rfkill_states[type] = state; + mutex_lock(&rfkill_global_mutex); list_for_each_entry(rfkill, &rfkill_list, node) { - if ((!rfkill->user_claim) && (rfkill->type == type)) - rfkill_toggle_radio(rfkill, state); + mutex_lock(&rfkill->mutex); + rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); + mutex_unlock(&rfkill->mutex); + } + for (i = 0; i < RFKILL_TYPE_MAX; i++) { + rfkill_global_states[i].default_state = + rfkill_global_states[i].current_state; + rfkill_global_states[i].current_state = + RFKILL_STATE_SOFT_BLOCKED; } + mutex_unlock(&rfkill_global_mutex); +} +EXPORT_SYMBOL_GPL(rfkill_epo); + +/** + * rfkill_restore_states - restore global states + * + * Restore (and sync switches to) the global state from the + * states in rfkill_default_states. This can undo the effects of + * a call to rfkill_epo(). + */ +void rfkill_restore_states(void) +{ + int i; - mutex_unlock(&rfkill_mutex); + mutex_lock(&rfkill_global_mutex); + + for (i = 0; i < RFKILL_TYPE_MAX; i++) + __rfkill_switch_all(i, rfkill_global_states[i].default_state); + mutex_unlock(&rfkill_global_mutex); } -EXPORT_SYMBOL(rfkill_switch_all); +EXPORT_SYMBOL_GPL(rfkill_restore_states); + +/** + * rfkill_force_state - Force the internal rfkill radio state + * @rfkill: pointer to the rfkill class to modify. + * @state: the current radio state the class should be forced to. + * + * This function updates the internal state of the radio cached + * by the rfkill class. It should be used when the driver gets + * a notification by the firmware/hardware of the current *real* + * state of the radio rfkill switch. + * + * Devices which are subject to external changes on their rfkill + * state (such as those caused by a hardware rfkill line) MUST + * have their driver arrange to call rfkill_force_state() as soon + * as possible after such a change. + * + * This function may not be called from an atomic context. + */ +int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) +{ + enum rfkill_state oldstate; + + BUG_ON(!rfkill); + if (WARN((state >= RFKILL_STATE_MAX), + KERN_WARNING + "rfkill: illegal state %d passed as parameter " + "to rfkill_force_state\n", state)) + return -EINVAL; + + mutex_lock(&rfkill->mutex); + + oldstate = rfkill->state; + rfkill->state = state; + + if (state != oldstate) + notify_rfkill_state_change(rfkill); + + mutex_unlock(&rfkill->mutex); + + return 0; +} +EXPORT_SYMBOL(rfkill_force_state); static ssize_t rfkill_name_show(struct device *dev, struct device_attribute *attr, @@ -109,31 +374,31 @@ static ssize_t rfkill_name_show(struct device *dev, return sprintf(buf, "%s\n", rfkill->name); } -static ssize_t rfkill_type_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static const char *rfkill_get_type_str(enum rfkill_type type) { - struct rfkill *rfkill = to_rfkill(dev); - const char *type; - - switch (rfkill->type) { + switch (type) { case RFKILL_TYPE_WLAN: - type = "wlan"; - break; + return "wlan"; case RFKILL_TYPE_BLUETOOTH: - type = "bluetooth"; - break; + return "bluetooth"; case RFKILL_TYPE_UWB: - type = "ultrawideband"; - break; + return "ultrawideband"; case RFKILL_TYPE_WIMAX: - type = "wimax"; - break; + return "wimax"; + case RFKILL_TYPE_WWAN: + return "wwan"; default: BUG(); } +} - return sprintf(buf, "%s\n", type); +static ssize_t rfkill_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); } static ssize_t rfkill_state_show(struct device *dev, @@ -142,6 +407,7 @@ static ssize_t rfkill_state_show(struct device *dev, { struct rfkill *rfkill = to_rfkill(dev); + update_rfkill_state(rfkill); return sprintf(buf, "%d\n", rfkill->state); } @@ -150,16 +416,24 @@ static ssize_t rfkill_state_store(struct device *dev, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); - unsigned int state = simple_strtoul(buf, NULL, 0); + unsigned long state; int error; if (!capable(CAP_NET_ADMIN)) return -EPERM; + error = strict_strtoul(buf, 0, &state); + if (error) + return error; + + /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ + if (state != RFKILL_STATE_UNBLOCKED && + state != RFKILL_STATE_SOFT_BLOCKED) + return -EINVAL; + if (mutex_lock_interruptible(&rfkill->mutex)) return -ERESTARTSYS; - error = rfkill_toggle_radio(rfkill, - state ? RFKILL_STATE_ON : RFKILL_STATE_OFF); + error = rfkill_toggle_radio(rfkill, state, 0); mutex_unlock(&rfkill->mutex); return error ? error : count; @@ -171,7 +445,7 @@ static ssize_t rfkill_claim_show(struct device *dev, { struct rfkill *rfkill = to_rfkill(dev); - return sprintf(buf, "%d", rfkill->user_claim); + return sprintf(buf, "%d\n", rfkill->user_claim); } static ssize_t rfkill_claim_store(struct device *dev, @@ -179,33 +453,41 @@ static ssize_t rfkill_claim_store(struct device *dev, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); - bool claim = !!simple_strtoul(buf, NULL, 0); + unsigned long claim_tmp; + bool claim; int error; if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (rfkill->user_claim_unsupported) + return -EOPNOTSUPP; + + error = strict_strtoul(buf, 0, &claim_tmp); + if (error) + return error; + claim = !!claim_tmp; + /* * Take the global lock to make sure the kernel is not in * the middle of rfkill_switch_all */ - error = mutex_lock_interruptible(&rfkill_mutex); + error = mutex_lock_interruptible(&rfkill_global_mutex); if (error) return error; - if (rfkill->user_claim_unsupported) { - error = -EOPNOTSUPP; - goto out_unlock; - } if (rfkill->user_claim != claim) { - if (!claim) + if (!claim) { + mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, - rfkill_states[rfkill->type]); + rfkill_global_states[rfkill->type].current_state, + 0); + mutex_unlock(&rfkill->mutex); + } rfkill->user_claim = claim; } -out_unlock: - mutex_unlock(&rfkill_mutex); + mutex_unlock(&rfkill_global_mutex); return error ? error : count; } @@ -229,21 +511,9 @@ static void rfkill_release(struct device *dev) #ifdef CONFIG_PM static int rfkill_suspend(struct device *dev, pm_message_t state) { - struct rfkill *rfkill = to_rfkill(dev); - - if (dev->power.power_state.event != state.event) { - if (state.event & PM_EVENT_SLEEP) { - mutex_lock(&rfkill->mutex); - - if (rfkill->state == RFKILL_STATE_ON) - rfkill->toggle_radio(rfkill->data, - RFKILL_STATE_OFF); - - mutex_unlock(&rfkill->mutex); - } - + /* mark class device as suspended */ + if (dev->power.power_state.event != state.event) dev->power.power_state = state; - } return 0; } @@ -255,13 +525,14 @@ static int rfkill_resume(struct device *dev) if (dev->power.power_state.event != PM_EVENT_ON) { mutex_lock(&rfkill->mutex); - if (rfkill->state == RFKILL_STATE_ON) - rfkill->toggle_radio(rfkill->data, RFKILL_STATE_ON); + dev->power.power_state.event = PM_EVENT_ON; + + /* restore radio state AND notify everybody */ + rfkill_toggle_radio(rfkill, rfkill->state, 1); mutex_unlock(&rfkill->mutex); } - dev->power.power_state = PMSG_ON; return 0; } #else @@ -269,35 +540,111 @@ static int rfkill_resume(struct device *dev) #define rfkill_resume NULL #endif +static int rfkill_blocking_uevent_notifier(struct notifier_block *nb, + unsigned long eventid, + void *data) +{ + struct rfkill *rfkill = (struct rfkill *)data; + + switch (eventid) { + case RFKILL_STATE_CHANGED: + kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block rfkill_blocking_uevent_nb = { + .notifier_call = rfkill_blocking_uevent_notifier, + .priority = 0, +}; + +static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct rfkill *rfkill = to_rfkill(dev); + int error; + + error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name); + if (error) + return error; + error = add_uevent_var(env, "RFKILL_TYPE=%s", + rfkill_get_type_str(rfkill->type)); + if (error) + return error; + error = add_uevent_var(env, "RFKILL_STATE=%d", rfkill->state); + return error; +} + static struct class rfkill_class = { .name = "rfkill", .dev_release = rfkill_release, .dev_attrs = rfkill_dev_attrs, .suspend = rfkill_suspend, .resume = rfkill_resume, + .dev_uevent = rfkill_dev_uevent, }; +static int rfkill_check_duplicity(const struct rfkill *rfkill) +{ + struct rfkill *p; + unsigned long seen[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + + memset(seen, 0, sizeof(seen)); + + list_for_each_entry(p, &rfkill_list, node) { + if (WARN((p == rfkill), KERN_WARNING + "rfkill: illegal attempt to register " + "an already registered rfkill struct\n")) + return -EEXIST; + set_bit(p->type, seen); + } + + /* 0: first switch of its kind */ + return test_bit(rfkill->type, seen); +} + static int rfkill_add_switch(struct rfkill *rfkill) { int error; - mutex_lock(&rfkill_mutex); + mutex_lock(&rfkill_global_mutex); - error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]); - if (!error) - list_add_tail(&rfkill->node, &rfkill_list); + error = rfkill_check_duplicity(rfkill); + if (error < 0) + goto unlock_out; - mutex_unlock(&rfkill_mutex); + if (!error) { + /* lock default after first use */ + set_bit(rfkill->type, rfkill_states_lockdflt); + rfkill_global_states[rfkill->type].current_state = + rfkill_global_states[rfkill->type].default_state; + } + + rfkill_toggle_radio(rfkill, + rfkill_global_states[rfkill->type].current_state, + 0); + + list_add_tail(&rfkill->node, &rfkill_list); + + error = 0; +unlock_out: + mutex_unlock(&rfkill_global_mutex); return error; } static void rfkill_remove_switch(struct rfkill *rfkill) { - mutex_lock(&rfkill_mutex); + mutex_lock(&rfkill_global_mutex); list_del_init(&rfkill->node); - rfkill_toggle_radio(rfkill, RFKILL_STATE_OFF); - mutex_unlock(&rfkill_mutex); + mutex_unlock(&rfkill_global_mutex); + + mutex_lock(&rfkill->mutex); + rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); + mutex_unlock(&rfkill->mutex); } /** @@ -306,17 +653,25 @@ static void rfkill_remove_switch(struct rfkill *rfkill) * @type: type of the switch (RFKILL_TYPE_*) * * This function should be called by the network driver when it needs - * rfkill structure. Once the structure is allocated the driver shoud - * finish its initialization by setting name, private data, enable_radio + * rfkill structure. Once the structure is allocated the driver should + * finish its initialization by setting the name, private data, enable_radio * and disable_radio methods and then register it with rfkill_register(). + * * NOTE: If registration fails the structure shoudl be freed by calling * rfkill_free() otherwise rfkill_unregister() should be used. */ -struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type) +struct rfkill * __must_check rfkill_allocate(struct device *parent, + enum rfkill_type type) { struct rfkill *rfkill; struct device *dev; + if (WARN((type >= RFKILL_TYPE_MAX), + KERN_WARNING + "rfkill: illegal type %d passed as parameter " + "to rfkill_allocate\n", type)) + return NULL; + rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL); if (!rfkill) return NULL; @@ -340,7 +695,7 @@ EXPORT_SYMBOL(rfkill_allocate); * rfkill_free - Mark rfkill structure for deletion * @rfkill: rfkill structure to be destroyed * - * Decrements reference count of rfkill structure so it is destroyed. + * Decrements reference count of the rfkill structure so it is destroyed. * Note that rfkill_free() should _not_ be called after rfkill_unregister(). */ void rfkill_free(struct rfkill *rfkill) @@ -355,7 +710,10 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) #ifdef CONFIG_RFKILL_LEDS int error; - rfkill->led_trigger.name = rfkill->dev.bus_id; + if (!rfkill->led_trigger.name) + rfkill->led_trigger.name = rfkill->dev.bus_id; + if (!rfkill->led_trigger.activate) + rfkill->led_trigger.activate = rfkill_led_trigger_activate; error = led_trigger_register(&rfkill->led_trigger); if (error) rfkill->led_trigger.name = NULL; @@ -365,8 +723,10 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) static void rfkill_led_trigger_unregister(struct rfkill *rfkill) { #ifdef CONFIG_RFKILL_LEDS - if (rfkill->led_trigger.name) + if (rfkill->led_trigger.name) { led_trigger_unregister(&rfkill->led_trigger); + rfkill->led_trigger.name = NULL; + } #endif } @@ -378,15 +738,18 @@ static void rfkill_led_trigger_unregister(struct rfkill *rfkill) * structure needs to be registered. Immediately from registration the * switch driver should be able to service calls to toggle_radio. */ -int rfkill_register(struct rfkill *rfkill) +int __must_check rfkill_register(struct rfkill *rfkill) { static atomic_t rfkill_no = ATOMIC_INIT(0); struct device *dev = &rfkill->dev; int error; - if (!rfkill->toggle_radio) - return -EINVAL; - if (rfkill->type >= RFKILL_TYPE_MAX) + if (WARN((!rfkill || !rfkill->toggle_radio || + rfkill->type >= RFKILL_TYPE_MAX || + rfkill->state >= RFKILL_STATE_MAX), + KERN_WARNING + "rfkill: attempt to register a " + "badly initialized rfkill struct\n")) return -EINVAL; snprintf(dev->bus_id, sizeof(dev->bus_id), @@ -402,8 +765,8 @@ int rfkill_register(struct rfkill *rfkill) error = device_add(dev); if (error) { - rfkill_led_trigger_unregister(rfkill); rfkill_remove_switch(rfkill); + rfkill_led_trigger_unregister(rfkill); return error; } @@ -412,7 +775,7 @@ int rfkill_register(struct rfkill *rfkill) EXPORT_SYMBOL(rfkill_register); /** - * rfkill_unregister - Uegister a rfkill structure. + * rfkill_unregister - Unregister a rfkill structure. * @rfkill: rfkill structure to be unregistered * * This function should be called by the network driver during device @@ -421,6 +784,7 @@ EXPORT_SYMBOL(rfkill_register); */ void rfkill_unregister(struct rfkill *rfkill) { + BUG_ON(!rfkill); device_del(&rfkill->dev); rfkill_remove_switch(rfkill); rfkill_led_trigger_unregister(rfkill); @@ -428,6 +792,56 @@ void rfkill_unregister(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_unregister); +/** + * rfkill_set_default - set initial value for a switch type + * @type - the type of switch to set the default state of + * @state - the new default state for that group of switches + * + * Sets the initial state rfkill should use for a given type. + * The following initial states are allowed: RFKILL_STATE_SOFT_BLOCKED + * and RFKILL_STATE_UNBLOCKED. + * + * This function is meant to be used by platform drivers for platforms + * that can save switch state across power down/reboot. + * + * The default state for each switch type can be changed exactly once. + * After a switch of that type is registered, the default state cannot + * be changed anymore. This guards against multiple drivers it the + * same platform trying to set the initial switch default state, which + * is not allowed. + * + * Returns -EPERM if the state has already been set once or is in use, + * so drivers likely want to either ignore or at most printk(KERN_NOTICE) + * if this function returns -EPERM. + * + * Returns 0 if the new default state was set, or an error if it + * could not be set. + */ +int rfkill_set_default(enum rfkill_type type, enum rfkill_state state) +{ + int error; + + if (WARN((type >= RFKILL_TYPE_MAX || + (state != RFKILL_STATE_SOFT_BLOCKED && + state != RFKILL_STATE_UNBLOCKED)), + KERN_WARNING + "rfkill: illegal state %d or type %d passed as " + "parameter to rfkill_set_default\n", state, type)) + return -EINVAL; + + mutex_lock(&rfkill_global_mutex); + + if (!test_and_set_bit(type, rfkill_states_lockdflt)) { + rfkill_global_states[type].default_state = state; + error = 0; + } else + error = -EPERM; + + mutex_unlock(&rfkill_global_mutex); + return error; +} +EXPORT_SYMBOL_GPL(rfkill_set_default); + /* * Rfkill module initialization/deinitialization. */ @@ -436,8 +850,13 @@ static int __init rfkill_init(void) int error; int i; - for (i = 0; i < ARRAY_SIZE(rfkill_states); i++) - rfkill_states[i] = RFKILL_STATE_ON; + /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ + if (rfkill_default_state != RFKILL_STATE_SOFT_BLOCKED && + rfkill_default_state != RFKILL_STATE_UNBLOCKED) + return -EINVAL; + + for (i = 0; i < RFKILL_TYPE_MAX; i++) + rfkill_global_states[i].default_state = rfkill_default_state; error = class_register(&rfkill_class); if (error) { @@ -445,11 +864,14 @@ static int __init rfkill_init(void) return error; } + register_rfkill_notifier(&rfkill_blocking_uevent_nb); + return 0; } static void __exit rfkill_exit(void) { + unregister_rfkill_notifier(&rfkill_blocking_uevent_nb); class_unregister(&rfkill_class); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1ebf65294405..a7f1ce11bc22 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -74,6 +74,20 @@ ax25_address rose_callsign; * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; +static struct lock_class_key rose_netdev_addr_lock_key; + +static void rose_set_lockdep_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key); +} + +static void rose_set_lockdep_key(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); +} /* * Convert a ROSE address into text. @@ -197,7 +211,7 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (event != NETDEV_DOWN) @@ -566,13 +580,11 @@ static struct sock *rose_make_new(struct sock *osk) #endif sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sock_copy_flags(sk, osk); init_timer(&rose->timer); @@ -759,7 +771,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le sock->state = SS_UNCONNECTED; rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, - &diagnostic); + &diagnostic, 0); if (!rose->neighbour) { err = -ENETUNREACH; goto out_release; @@ -855,7 +867,7 @@ rose_try_next_neigh: if (sk->sk_state != TCP_ESTABLISHED) { /* Try next neighbour */ - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic); + rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); if (rose->neighbour) goto rose_try_next_neigh; @@ -924,14 +936,12 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) goto out_release; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; out_release: release_sock(sk); @@ -1580,7 +1590,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } - lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key); + rose_set_lockdep_key(dev); dev_rose[i] = dev; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index bd593871c81e..a81066a1010a 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -662,27 +662,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig } /* - * Find a neighbour given a ROSE address. + * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic) + unsigned char *diagnostic, int new) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; - spin_lock_bh(&rose_node_list_lock); + if (!new) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { - if (!rose_ftimer_running(node->neighbour[i])) { - res = node->neighbour[i]; - goto out; - } else - failed = 1; + if (new) { + if (node->neighbour[i]->restarted) { + res = node->neighbour[i]; + goto out; + } + } + else { + if (!rose_ftimer_running(node->neighbour[i])) { + res = node->neighbour[i]; + goto out; + } else + failed = 1; + } } - break; } } @@ -695,7 +702,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, } out: - spin_unlock_bh(&rose_node_list_lock); + if (!new) spin_unlock_bh(&rose_node_list_lock); return res; } @@ -1018,7 +1025,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_route = rose_route->next; } - if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) { + if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic); goto out; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 4b2682feeedc..32e489118beb 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -660,9 +660,9 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive rxrpc socket: %p\n", sk); diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c index bdfb77417794..77228f28fa36 100644 --- a/net/rxrpc/ar-accept.c +++ b/net/rxrpc/ar-accept.c @@ -100,7 +100,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, trans = rxrpc_get_transport(local, peer, GFP_NOIO); rxrpc_put_peer(peer); - if (!trans) { + if (IS_ERR(trans)) { _debug("no trans"); ret = -EBUSY; goto error; diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index f8a699e92962..f98c8027e5c1 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -21,6 +21,7 @@ #include <net/af_rxrpc.h> #include <net/ip.h> #include <net/udp.h> +#include <net/net_namespace.h> #include "ar-internal.h" unsigned long rxrpc_ack_timeout = 1; @@ -708,12 +709,12 @@ void rxrpc_data_ready(struct sock *sk, int count) if (skb_checksum_complete(skb)) { rxrpc_free_skb(skb); rxrpc_put_local(local); - UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); /* the socket buffer we have is owned by UDP, with UDP's data all over * it, but we really want our own */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 82adfe6447d7..6767e54155db 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,16 +106,14 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. -config NET_SCH_RR - tristate "Multi Band Round Robin Queuing (RR)" - select NET_SCH_PRIO +config NET_SCH_MULTIQ + tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)" ---help--- - Say Y here if you want to use an n-band round robin packet - scheduler. + Say Y here if you want to use an n-band queue packet scheduler + to support devices that have multiple hardware transmit queues. - The module uses sch_prio for its framework and is aliased as - sch_rr, so it will load sch_prio, although it is referred - to using sch_rr. + To compile this code as a module, choose M here: the + module will be called sch_multiq. config NET_SCH_RED tristate "Random Early Detection (RED)" @@ -487,6 +485,17 @@ config NET_ACT_SIMP To compile this code as a module, choose M here: the module will be called simple. +config NET_ACT_SKBEDIT + tristate "SKB Editing" + depends on NET_CLS_ACT + ---help--- + Say Y here to change skb priority or queue_mapping settings. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called skbedit. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 1d2b0f7df848..e60c9925b269 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o @@ -26,6 +27,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 74e662cbb2c5..9974b3f04f05 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -41,7 +41,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) return; } } - BUG_TRAP(0); + WARN_ON(1); } EXPORT_SYMBOL(tcf_hash_destroy); @@ -205,10 +205,9 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, { struct tcf_common *p = NULL; if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { - if (bind) { + if (bind) p->tcfc_bindcnt++; - p->tcfc_refcnt++; - } + p->tcfc_refcnt++; a->priv = p; } return p; @@ -752,7 +751,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) struct nlattr *tb[TCA_ACT_MAX+1]; struct nlattr *kind; struct tc_action *a = create_a(0); - int err = -EINVAL; + int err = -ENOMEM; if (a == NULL) { printk("tca_action_flush: couldnt create tc_action\n"); @@ -763,7 +762,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) if (!skb) { printk("tca_action_flush: failed skb alloc\n"); kfree(a); - return -ENOBUFS; + return err; } b = skb_tail_pointer(skb); @@ -791,6 +790,8 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) goto nla_put_failure; + if (err == 0) + goto noflush_out; nla_nest_end(skb, nest); @@ -808,6 +809,7 @@ nla_put_failure: nlmsg_failure: module_put(a->ops->owner); err_out: +noflush_out: kfree_skb(skb); kfree(a); return err; @@ -825,8 +827,10 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) return ret; if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { - if (tb[0] != NULL && tb[1] == NULL) - return tca_action_flush(tb[0], n, pid); + if (tb[1] != NULL) + return tca_action_flush(tb[1], n, pid); + else + return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 422872c4f14b..ac04289da5d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result #else action = gact->tcf_action; #endif - gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.bytes += qdisc_pkt_len(skb); gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) gact->tcf_qstats.drops++; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index da696fd3e341..0453d79ebf57 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = { static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { + struct xt_tgchk_param par; struct xt_target *target; int ret = 0; @@ -49,29 +50,30 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int return -ENOENT; t->u.kernel.target = target; - - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - table, hook, 0, 0); - if (ret) { + par.table = table; + par.entryinfo = NULL; + par.target = target; + par.targinfo = t->data; + par.hook_mask = hook; + par.family = NFPROTO_IPV4; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { module_put(t->u.kernel.target->me); return ret; } - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(table, NULL, - t->u.kernel.target, t->data, - hook)) { - module_put(t->u.kernel.target->me); - ret = -EINVAL; - } - - return ret; + return 0; } static void ipt_destroy_target(struct ipt_entry_target *t) { - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + struct xt_tgdtor_param par = { + .target = t->u.kernel.target, + .targinfo = t->data, + }; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); } static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) @@ -196,6 +198,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, { int ret = 0, result = 0; struct tcf_ipt *ipt = a->priv; + struct xt_target_param par; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -205,16 +208,19 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, spin_lock(&ipt->tcf_lock); ipt->tcf_tm.lastuse = jiffies; - ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, - ipt->tcfi_hook, - ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data); + par.in = skb->dev; + par.out = NULL; + par.hooknum = ipt->tcfi_hook; + par.target = ipt->tcfi_t->u.kernel.target; + par.targinfo = ipt->tcfi_t->data; + ret = par.target->target(skb, &par); + switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1aff005d95cd..70341c020b6d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -164,7 +164,7 @@ bad_mirred: if (skb2 != NULL) kfree_skb(skb2); m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); m->tcf_bstats.packets++; spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? @@ -184,7 +184,7 @@ bad_mirred: goto bad_mirred; } - m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb2); m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) if (m->tcfm_ok_push) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 0a3c8339767a..7b39ed485bca 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 3cc4cb9e500e..d5f4e3404864 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, bad: p->tcf_qstats.overlimits++; done: - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); return p->tcf_action; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0898120bbcc0..38015b493947 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -116,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p) return; } } - BUG_TRAP(0); + WARN_ON(1); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { @@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, spin_lock(&police->tcf_lock); - police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.bytes += qdisc_pkt_len(skb); police->tcf_bstats.packets++; if (police->tcfp_ewma_rate && @@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, return police->tcf_action; } - if (skb->len <= police->tcfp_mtu) { + if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); return police->tcfp_result; @@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, ptoks = toks + police->tcfp_ptoks; if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); + ptoks -= L2T_P(police, qdisc_pkt_len(skb)); } toks += police->tcfp_toks; if (toks > (long)police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, skb->len); + toks -= L2T(police, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 1d421d059caf..e7851ce92cfe 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -41,7 +41,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); d->tcf_bstats.packets++; /* print policy string followed by _ then packet count diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c new file mode 100644 index 000000000000..fe9777e77f35 --- /dev/null +++ b/net/sched/act_skbedit.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck <alexander.h.duyck@intel.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_skbedit.h> +#include <net/tc_act/tc_skbedit.h> + +#define SKBEDIT_TAB_MASK 15 +static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; +static u32 skbedit_idx_gen; +static DEFINE_RWLOCK(skbedit_lock); + +static struct tcf_hashinfo skbedit_hash_info = { + .htab = tcf_skbedit_ht, + .hmask = SKBEDIT_TAB_MASK, + .lock = &skbedit_lock, +}; + +static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbedit *d = a->priv; + + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); + d->tcf_bstats.packets++; + + if (d->flags & SKBEDIT_F_PRIORITY) + skb->priority = d->priority; + if (d->flags & SKBEDIT_F_QUEUE_MAPPING && + skb->dev->real_num_tx_queues > d->queue_mapping) + skb_set_queue_mapping(skb, d->queue_mapping); + + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { + [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, + [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, +}; + +static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tc_skbedit *parm; + struct tcf_skbedit *d; + struct tcf_common *pc; + u32 flags = 0, *priority = NULL; + u16 *queue_mapping = NULL; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + if (err < 0) + return err; + + if (tb[TCA_SKBEDIT_PARMS] == NULL) + return -EINVAL; + + if (tb[TCA_SKBEDIT_PRIORITY] != NULL) { + flags |= SKBEDIT_F_PRIORITY; + priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]); + } + + if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + flags |= SKBEDIT_F_QUEUE_MAPPING; + queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); + } + if (!flags) + return -EINVAL; + + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &skbedit_idx_gen, &skbedit_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_skbedit(pc); + ret = ACT_P_CREATED; + } else { + d = to_skbedit(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &skbedit_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&d->tcf_lock); + + d->flags = flags; + if (flags & SKBEDIT_F_PRIORITY) + d->priority = *priority; + if (flags & SKBEDIT_F_QUEUE_MAPPING) + d->queue_mapping = *queue_mapping; + d->tcf_action = parm->action; + + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &skbedit_hash_info); + return ret; +} + +static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbedit *d = a->priv; + + if (d) + return tcf_hash_release(&d->common, bind, &skbedit_hash_info); + return 0; +} + +static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbedit *d = a->priv; + struct tc_skbedit opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); + if (d->flags & SKBEDIT_F_PRIORITY) + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), + &d->priority); + if (d->flags & SKBEDIT_F_QUEUE_MAPPING) + NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, + sizeof(d->queue_mapping), &d->queue_mapping); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t); + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_skbedit_ops = { + .kind = "skbedit", + .hinfo = &skbedit_hash_info, + .type = TCA_ACT_SKBEDIT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_skbedit, + .dump = tcf_skbedit_dump, + .cleanup = tcf_skbedit_cleanup, + .init = tcf_skbedit_init, + .walk = tcf_generic_walker, +}; + +MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); +MODULE_DESCRIPTION("SKB Editing"); +MODULE_LICENSE("GPL"); + +static int __init skbedit_init_module(void) +{ + return tcf_register_action(&act_skbedit_ops); +} + +static void __exit skbedit_cleanup_module(void) +{ + tcf_unregister_action(&act_skbedit_ops); +} + +module_init(skbedit_init_module); +module_exit(skbedit_cleanup_module); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9360fc81e8c7..8eb79e92e94c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -120,6 +120,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + spinlock_t *root_lock; struct tcmsg *t; u32 protocol; u32 prio; @@ -166,7 +167,8 @@ replay: /* Find qdisc */ if (!parent) { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -203,6 +205,8 @@ replay: } } + root_lock = qdisc_root_sleeping_lock(q); + if (tp == NULL) { /* Proto-tcf does not exist, create new one */ @@ -262,10 +266,10 @@ replay: goto errout; } - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); tp->next = *back; *back = tp; - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -274,9 +278,9 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); *back = tp->next; - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); @@ -334,7 +338,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad1 = 0; - tcm->tcm_ifindex = tp->q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; tcm->tcm_parent = tp->classid; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind); @@ -390,6 +394,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -408,8 +413,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; + dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) - q = dev->qdisc_sleeping; + q = dev_queue->qdisc_sleeping; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 971b867e0484..0ebaff637e31 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -36,6 +36,8 @@ struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; + struct timer_list perturb_timer; + u32 perturb_period; u32 handle; u32 nkeys; @@ -47,11 +49,9 @@ struct flow_filter { u32 addend; u32 divisor; u32 baseclass; + u32 hashrnd; }; -static u32 flow_hashrnd __read_mostly; -static int flow_hashrnd_initted __read_mostly; - static const struct tcf_ext_map flow_ext_map = { .action = TCA_FLOW_ACT, .police = TCA_FLOW_POLICE, @@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->saddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); default: return addr_fold(skb->sk); @@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb) static u32 flow_get_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->daddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); default: return addr_fold(skb->dst) ^ (__force u16)skb->protocol; @@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb) static u32 flow_get_proto(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ip_hdr(skb)->protocol; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ipv6_hdr(skb)->nexthdr; default: return 0; @@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: @@ -225,9 +225,9 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: @@ -348,7 +348,7 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, } if (f->mode == FLOW_MODE_HASH) - classid = jhash2(keys, f->nkeys, flow_hashrnd); + classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; @@ -369,6 +369,15 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; } +static void flow_perturbation(unsigned long arg) +{ + struct flow_filter *f = (struct flow_filter *)arg; + + get_random_bytes(&f->hashrnd, 4); + if (f->perturb_period) + mod_timer(&f->perturb_timer, jiffies + f->perturb_period); +} + static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, @@ -381,6 +390,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, + [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static int flow_change(struct tcf_proto *tp, unsigned long base, @@ -394,6 +404,7 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, struct tcf_exts e; struct tcf_ematch_tree t; unsigned int nkeys = 0; + unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; @@ -442,6 +453,14 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + + if (mode == FLOW_MODE_HASH) + perturb_period = f->perturb_period; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } } else { err = -EINVAL; if (!handle) @@ -455,6 +474,12 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } + if (TC_H_MAJ(baseclass) == 0) baseclass = TC_H_MAKE(tp->q->handle, baseclass); if (TC_H_MIN(baseclass) == 0) @@ -467,6 +492,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, f->handle = handle; f->mask = ~0U; + + get_random_bytes(&f->hashrnd, 4); + f->perturb_timer.function = flow_perturbation; + f->perturb_timer.data = (unsigned long)f; + init_timer_deferrable(&f->perturb_timer); } tcf_exts_change(tp, &f->exts, &e); @@ -495,6 +525,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (baseclass) f->baseclass = baseclass; + f->perturb_period = perturb_period; + del_timer(&f->perturb_timer); + if (perturb_period) + mod_timer(&f->perturb_timer, jiffies + perturb_period); + if (*arg == 0) list_add_tail(&f->list, &head->filters); @@ -512,6 +547,7 @@ err1: static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) { + del_timer_sync(&f->perturb_timer); tcf_exts_destroy(tp, &f->exts); tcf_em_tree_destroy(tp, &f->ematches); kfree(f); @@ -532,11 +568,6 @@ static int flow_init(struct tcf_proto *tp) { struct flow_head *head; - if (!flow_hashrnd_initted) { - get_random_bytes(&flow_hashrnd, 4); - flow_hashrnd_initted = 1; - } - head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; @@ -605,6 +636,9 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, if (f->baseclass) NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); + if (f->perturb_period) + NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ); + if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 784dcb870b98..e3d8455eebc2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -73,11 +73,13 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) } static inline -void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id) +void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { - qdisc_lock_tree(dev); + spinlock_t *root_lock = qdisc_root_sleeping_lock(q); + + spin_lock_bh(root_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } static inline void @@ -302,7 +304,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) *fp = f->next; tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); route4_delete_filter(tp, f); /* Strip tree */ @@ -500,7 +502,7 @@ reinsert: } tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); *arg = (unsigned long)f; return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4d755444c449..246f9065ce34 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -75,7 +75,6 @@ struct tc_u_hnode struct tc_u_common { - struct tc_u_common *next; struct tc_u_hnode *hlist; struct Qdisc *q; int refcnt; @@ -87,8 +86,6 @@ static const struct tcf_ext_map u32_ext_map = { .police = TCA_U32_POLICE }; -static struct tc_u_common *u32_list; - static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) { unsigned h = ntohl(key & sel->hmask)>>fshift; @@ -287,9 +284,7 @@ static int u32_init(struct tcf_proto *tp) struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; - for (tp_c = u32_list; tp_c; tp_c = tp_c->next) - if (tp_c->q == tp->q) - break; + tp_c = tp->q->u32_node; root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) @@ -307,8 +302,7 @@ static int u32_init(struct tcf_proto *tp) return -ENOBUFS; } tp_c->q = tp->q; - tp_c->next = u32_list; - u32_list = tp_c; + tp->q->u32_node = tp_c; } tp_c->refcnt++; @@ -351,7 +345,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) } } } - BUG_TRAP(0); + WARN_ON(1); return 0; } @@ -374,7 +368,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode **hn; - BUG_TRAP(!ht->refcnt); + WARN_ON(ht->refcnt); u32_clear_hnode(tp, ht); @@ -386,7 +380,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } - BUG_TRAP(0); + WARN_ON(1); return -ENOENT; } @@ -395,21 +389,15 @@ static void u32_destroy(struct tcf_proto *tp) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); - BUG_TRAP(root_ht != NULL); + WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; - struct tc_u_common **tp_cp; - for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) { - if (*tp_cp == tp_c) { - *tp_cp = tp_c->next; - break; - } - } + tp->q->u32_node = NULL; for (ht = tp_c->hlist; ht; ht = ht->next) { ht->refcnt--; @@ -419,7 +407,7 @@ static void u32_destroy(struct tcf_proto *tp) while ((ht = tp_c->hlist) != NULL) { tp_c->hlist = ht->next; - BUG_TRAP(ht->refcnt == 0); + WARN_ON(ht->refcnt != 0); kfree(ht); } diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index cc49c932641d..bc450397487a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/tc_ematch/tc_em_cmp.h> +#include <asm/unaligned.h> #include <net/pkt_cls.h> static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp) @@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, break; case TCF_EM_ALIGN_U16: - val = *ptr << 8; - val |= *(ptr+1); + val = get_unaligned_be16(ptr); if (cmp_needs_transformation(cmp)) val = be16_to_cpu(val); @@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, case TCF_EM_ALIGN_U32: /* Worth checking boundries? The branching seems * to get worse. Visit again. */ - val = *ptr << 24; - val |= *(ptr+1) << 16; - val |= *(ptr+2) << 8; - val |= *(ptr+3); + val = get_unaligned_be32(ptr); if (cmp_needs_transformation(cmp)) val = be32_to_cpu(val); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c40773cdbe45..1122c952aa99 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,6 +27,7 @@ #include <linux/kmod.h> #include <linux/list.h> #include <linux/hrtimer.h> +#include <linux/lockdep.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -99,7 +100,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, ---requeue requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if dev->tbusy=0. + just buggy devices, which can defer output even if netif_queue_stopped()=0. ---reset @@ -183,17 +184,70 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; - list_for_each_entry(q, &dev->qdisc_list, list) { + if (!(root->flags & TCQ_F_BUILTIN) && + root->handle == handle) + return root; + + list_for_each_entry(q, &root->list, list) { if (q->handle == handle) return q; } return NULL; } +/* + * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() + * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() + */ +static DEFINE_SPINLOCK(qdisc_list_lock); + +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + spin_unlock_bh(&qdisc_list_lock); + } +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_del(&q->list); + spin_unlock_bh(&qdisc_list_lock); + } +} +EXPORT_SYMBOL(qdisc_list_del); + +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + unsigned int i; + struct Qdisc *q; + + spin_lock_bh(&qdisc_list_lock); + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *txq_root = txq->qdisc_sleeping; + + q = qdisc_match_from_root(txq_root, handle); + if (q) + goto unlock; + } + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + +unlock: + spin_unlock_bh(&qdisc_list_lock); + + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -277,15 +331,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } EXPORT_SYMBOL(qdisc_put_rtab); +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = wd->qdisc->dev; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule(dev); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } @@ -302,6 +478,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_US2NS(expires)); @@ -316,6 +496,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head), i; + struct hlist_head *h; + + if (size <= PAGE_SIZE) + h = kmalloc(size, GFP_KERNEL); + else + h = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(size)); + + if (h != NULL) { + for (i = 0; i < n; i++) + INIT_HLIST_HEAD(&h[i]); + } + return h; +} + +static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head); + + if (size <= PAGE_SIZE) + kfree(h); + else + free_pages((unsigned long)h, get_order(size)); +} + +void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) +{ + struct Qdisc_class_common *cl; + struct hlist_node *n, *next; + struct hlist_head *nhash, *ohash; + unsigned int nsize, nmask, osize; + unsigned int i, h; + + /* Rehash when load factor exceeds 0.75 */ + if (clhash->hashelems * 4 <= clhash->hashsize * 3) + return; + nsize = clhash->hashsize * 2; + nmask = nsize - 1; + nhash = qdisc_class_hash_alloc(nsize); + if (nhash == NULL) + return; + + ohash = clhash->hash; + osize = clhash->hashsize; + + sch_tree_lock(sch); + for (i = 0; i < osize; i++) { + hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + h = qdisc_class_hash(cl->classid, nmask); + hlist_add_head(&cl->hnode, &nhash[h]); + } + } + clhash->hash = nhash; + clhash->hashsize = nsize; + clhash->hashmask = nmask; + sch_tree_unlock(sch); + + qdisc_class_hash_free(ohash, osize); +} +EXPORT_SYMBOL(qdisc_class_hash_grow); + +int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) +{ + unsigned int size = 4; + + clhash->hash = qdisc_class_hash_alloc(size); + if (clhash->hash == NULL) + return -ENOMEM; + clhash->hashsize = size; + clhash->hashmask = size - 1; + clhash->hashelems = 0; + return 0; +} +EXPORT_SYMBOL(qdisc_class_hash_init); + +void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) +{ + qdisc_class_hash_free(clhash->hash, clhash->hashsize); +} +EXPORT_SYMBOL(qdisc_class_hash_destroy); + +void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + unsigned int h; + + INIT_HLIST_NODE(&cl->hnode); + h = qdisc_class_hash(cl->classid, clhash->hashmask); + hlist_add_head(&cl->hnode, &clhash->hash[h]); + clhash->hashelems++; +} +EXPORT_SYMBOL(qdisc_class_hash_insert); + +void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + hlist_del(&cl->hnode); + clhash->hashelems--; +} +EXPORT_SYMBOL(qdisc_class_hash_remove); + /* Allocate an unique handle from space managed by kernel */ static u32 qdisc_alloc_handle(struct net_device *dev) @@ -332,47 +616,28 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device dev */ +/* Attach toplevel qdisc to device queue. */ -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) { - struct Qdisc *oqdisc; - - if (dev->flags & IFF_UP) - dev_deactivate(dev); - - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - oqdisc = dev->qdisc_ingress; - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { - /* delete */ - qdisc_reset(oqdisc); - dev->qdisc_ingress = NULL; - } else { /* new */ - dev->qdisc_ingress = qdisc; - } - - } else { - - oqdisc = dev->qdisc_sleeping; + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + spinlock_t *root_lock; - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); + root_lock = qdisc_lock(oqdisc); + spin_lock_bh(root_lock); - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; - } + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); - qdisc_unlock_tree(dev); + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); - if (dev->flags & IFF_UP) - dev_activate(dev); + spin_unlock_bh(root_lock); return oqdisc; } @@ -389,7 +654,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; - sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON(parentid != TC_H_ROOT); return; @@ -405,26 +670,61 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(skb, n, clid, old, new); + + if (old) + qdisc_destroy(old); +} - Old qdisc is not destroyed but returned in *old. +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); - } else { - *old = dev_graft_qdisc(dev, new); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if ((q && q->flags & TCQ_F_INGRESS) || + (new && new->flags & TCQ_F_INGRESS)) { + num_q = 1; + ingress = 1; } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + + notify_and_destroy(skb, n, classid, old, new); + } + + if (dev->flags & IFF_UP) + dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; @@ -433,14 +733,20 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (cops) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); } } + if (!err) + notify_and_destroy(skb, n, classid, old, new); } return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -448,13 +754,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, + u32 parent, u32 handle, struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_KMOD @@ -489,7 +796,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -499,25 +806,40 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; - sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { - sch->stats_lock = &dev->queue_lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out3; + } + sch->stab = stab; + } if (tca[TCA_RATE]) { + spinlock_t *root_lock; + + if ((sch->parent != TC_H_ROOT) && + !(sch->flags & TCQ_F_INGRESS)) + root_lock = qdisc_root_sleeping_lock(sch); + else + root_lock = qdisc_lock(sch); + err = gen_new_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, - tca[TCA_RATE]); + root_lock, tca[TCA_RATE]); if (err) { /* * Any broken qdiscs that would require @@ -529,13 +851,13 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, goto err_out3; } } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev->qdisc_list); - qdisc_unlock_tree(dev); + + qdisc_list_add(sch); return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -547,18 +869,30 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); return 0; } @@ -634,10 +968,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc_sleeping; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } if (!q) return -ENOENT; @@ -657,14 +993,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } } else { qdisc_notify(skb, n, clid, NULL, q); } @@ -708,10 +1038,12 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc_sleeping; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } /* It may be default qdisc, ignore it */ @@ -788,10 +1120,12 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + q = qdisc_create(dev, &dev->rx_queue, + tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { if (err == -EAGAIN) @@ -800,24 +1134,13 @@ create_n_graft: } graft: - if (1) { - struct Qdisc *old_q = NULL; - err = qdisc_graft(dev, p, clid, q, &old_q); - if (err) { - if (q) { - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } - return err; - } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - qdisc_lock_tree(dev); - qdisc_destroy(old_q); - qdisc_unlock_tree(dev); - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } @@ -834,7 +1157,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = atomic_read(&q->refcnt); @@ -843,8 +1166,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -894,13 +1220,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -910,21 +1280,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -949,6 +1320,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -986,6 +1358,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -996,7 +1369,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1007,7 +1380,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; } /* OK. Locate qdisc */ @@ -1080,7 +1453,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; @@ -1088,8 +1461,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) @@ -1140,15 +1513,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct netdev_queue *dev_queue; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1161,28 +1581,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + goto done; +done: cb->args[0] = t; dev_put(dev); @@ -1252,12 +1659,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 335273416384..43d37256c15e 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -160,9 +160,9 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(flow->filter_list); + tcf_destroy_chain(&flow->filter_list); if (flow->sock) { - pr_debug("atm_tc_put: f_count %d\n", + pr_debug("atm_tc_put: f_count %ld\n", file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); @@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, sock = sockfd_lookup(fd, &error); if (!sock) return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file)); + pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; @@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -414,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: kfree_skb(skb); goto drop; @@ -428,17 +429,19 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) #endif } - ret = flow->q->enqueue(skb, flow->q); + ret = qdisc_enqueue(skb, flow->q); if (ret != 0) { drop: __maybe_unused - sch->qstats.drops++; - if (flow) - flow->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + if (flow) + flow->qstats.drops++; + } return ret; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - flow->bstats.bytes += skb->len; + flow->bstats.bytes += qdisc_pkt_len(skb); flow->bstats.packets++; /* * Okay, this may seem weird. We pretend we've dropped the packet if @@ -454,7 +457,7 @@ drop: __maybe_unused return 0; } tasklet_schedule(&p->task); - return NET_XMIT_BYPASS; + return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } /* @@ -529,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) if (!ret) { sch->q.qlen++; sch->qstats.requeues++; - } else { + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; p->link.qstats.drops++; } @@ -555,7 +558,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); @@ -586,10 +590,11 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + tcf_destroy_chain(&flow->filter_list); + /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(flow->filter_list); - flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09969c1fbc08..8b06fa900482 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -73,11 +73,10 @@ struct cbq_sched_data; struct cbq_class { - struct cbq_class *next; /* hash table link */ + struct Qdisc_class_common common; struct cbq_class *next_alive; /* next class with backlog in this priority band */ /* Parameters */ - u32 classid; unsigned char priority; /* class priority */ unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ @@ -144,7 +143,7 @@ struct cbq_class struct cbq_sched_data { - struct cbq_class *classes[16]; /* Hash table of all classes */ + struct Qdisc_class_hash clhash; /* Hash table of all classes */ int nclasses[TC_CBQ_MAXPRIO+1]; unsigned quanta[TC_CBQ_MAXPRIO+1]; @@ -177,23 +176,15 @@ struct cbq_sched_data #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) - -static __inline__ unsigned cbq_hash(u32 h) -{ - h ^= h>>8; - h ^= h>>4; - return h&0xF; -} - static __inline__ struct cbq_class * cbq_class_lookup(struct cbq_sched_data *q, u32 classid) { - struct cbq_class *cl; + struct Qdisc_class_common *clc; - for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next) - if (cl->classid == classid) - return cl; - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct cbq_class, common); } #ifdef CONFIG_NET_CLS_ACT @@ -239,7 +230,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) (cl = cbq_class_lookup(q, prio)) != NULL) return cl; - *qerr = NET_XMIT_BYPASS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; for (;;) { int result = 0; defmap = head->defaults; @@ -265,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: @@ -379,7 +370,6 @@ static int cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - int len = skb->len; int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); @@ -387,7 +377,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->rx_class = cl; #endif if (cl == NULL) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; @@ -396,19 +386,22 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) #ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif - if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); return ret; } - sch->qstats.drops++; - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cbq_mark_toplevel(q, cl); + cl->qstats.drops++; + } return ret; } @@ -439,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return ret; } @@ -526,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) struct cbq_sched_data *q = qdisc_priv(cl->qdisc); psched_tdiff_t delay = cl->undertime - q->now; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(cl->qdisc)->state)) + return; + if (!cl->delayed) { psched_time_t sched = q->now; ktime_t expires; @@ -659,14 +658,13 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); + __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } #ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { - int len = skb->len; struct Qdisc *sch = child->__parent; struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = q->rx_class; @@ -674,21 +672,24 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) q->rx_class = NULL; if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { + int ret; cbq_mark_toplevel(q, cl); q->rx_class = cl; cl->q->__parent = sch; - if (cl->q->enqueue(skb, cl->q) == 0) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return 0; } @@ -889,7 +890,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) if (skb == NULL) goto skip_class; - cl->deficit -= skb->len; + cl->deficit -= qdisc_pkt_len(skb); q->tx_class = cl; q->tx_borrowed = borrow; if (borrow != cl) { @@ -897,11 +898,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) borrow->xstats.borrows++; cl->xstats.borrows++; #else - borrow->xstats.borrows += skb->len; - cl->xstats.borrows += skb->len; + borrow->xstats.borrows += qdisc_pkt_len(skb); + cl->xstats.borrows += qdisc_pkt_len(skb); #endif } - q->tx_len = skb->len; + q->tx_len = qdisc_pkt_len(skb); if (cl->deficit <= 0) { q->active[prio] = cl; @@ -1071,13 +1072,14 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - unsigned h; + struct hlist_node *n; + unsigned int h; if (q->quanta[prio] == 0) return; - for (h=0; h<16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of arithmetic overflows! */ @@ -1085,9 +1087,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { - printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum); - cl->quantum = cl->qdisc->dev->mtu/2 + 1; + if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { + printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); + cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } } @@ -1114,10 +1116,12 @@ static void cbq_sync_defmap(struct cbq_class *cl) if (split->defaults[i]) continue; - for (h=0; h<16; h++) { + for (h = 0; h < q->clhash.hashsize; h++) { + struct hlist_node *n; struct cbq_class *c; - for (c = q->classes[h]; c; c = c->next) { + hlist_for_each_entry(c, n, &q->clhash.hash[h], + common.hnode) { if (c->split == split && c->level < level && c->defmap&(1<<i)) { split->defaults[i] = c; @@ -1135,12 +1139,12 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma if (splitid == 0) { if ((split = cl->split) == NULL) return; - splitid = split->classid; + splitid = split->common.classid; } - if (split == NULL || split->classid != splitid) { + if (split == NULL || split->common.classid != splitid) { for (split = cl->tparent; split; split = split->tparent) - if (split->classid == splitid) + if (split->common.classid == splitid) break; } @@ -1163,13 +1167,7 @@ static void cbq_unlink_class(struct cbq_class *this) struct cbq_class *cl, **clp; struct cbq_sched_data *q = qdisc_priv(this->qdisc); - for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) { - if (cl == this) { - *clp = cl->next; - cl->next = NULL; - break; - } - } + qdisc_class_hash_remove(&q->clhash, &this->common); if (this->tparent) { clp=&this->sibling; @@ -1188,19 +1186,17 @@ static void cbq_unlink_class(struct cbq_class *this) this->tparent->children = NULL; } } else { - BUG_TRAP(this->sibling == this); + WARN_ON(this->sibling != this); } } static void cbq_link_class(struct cbq_class *this) { struct cbq_sched_data *q = qdisc_priv(this->qdisc); - unsigned h = cbq_hash(this->classid); struct cbq_class *parent = this->tparent; this->sibling = this; - this->next = q->classes[h]; - q->classes[h] = this; + qdisc_class_hash_insert(&q->clhash, &this->common); if (parent == NULL) return; @@ -1242,6 +1238,7 @@ cbq_reset(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; + struct hlist_node *n; int prio; unsigned h; @@ -1258,8 +1255,8 @@ cbq_reset(struct Qdisc* sch) for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) q->active[prio] = NULL; - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1406,11 +1403,16 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + goto put_rtab; + q->link.refcnt = 1; q->link.sibling = &q->link; - q->link.classid = sch->handle; + q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle))) q->link.q = &noop_qdisc; @@ -1419,7 +1421,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.cpriority = TC_CBQ_MAXPRIO-1; q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; q->link.overlimit = cbq_ovl_classic; - q->link.allot = psched_mtu(sch->dev); + q->link.allot = psched_mtu(qdisc_dev(sch)); q->link.quantum = q->link.allot; q->link.weight = q->link.R_tab->rate.rate; @@ -1441,6 +1443,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; + +put_rtab: + qdisc_put_rtab(q->link.R_tab); + return err; } static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) @@ -1521,7 +1527,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) struct tc_cbq_fopt opt; if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->classid : 0; + opt.split = cl->split ? cl->split->common.classid : 0; opt.defmap = cl->defmap; opt.defchange = ~0; NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); @@ -1602,10 +1608,10 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct nlattr *nest; if (cl->tparent) - tcm->tcm_parent = cl->tparent->classid; + tcm->tcm_parent = cl->tparent->common.classid; else tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -1650,8 +1656,10 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) == NULL) + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid); + if (new == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_ACT @@ -1702,9 +1710,9 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(sch); - BUG_TRAP(!cl->filters); + WARN_ON(cl->filters); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1716,6 +1724,7 @@ static void cbq_destroy(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); + struct hlist_node *n, *next; struct cbq_class *cl; unsigned h; @@ -1727,20 +1736,16 @@ cbq_destroy(struct Qdisc* sch) * classes from root to leafs which means that filters can still * be bound to classes which have been destroyed already. --TGR '04 */ - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { - tcf_destroy_chain(cl->filter_list); - cl->filter_list = NULL; - } + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) + tcf_destroy_chain(&cl->filter_list); } - for (h = 0; h < 16; h++) { - struct cbq_class *next; - - for (cl = q->classes[h]; cl; cl = next) { - next = cl->next; + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + common.hnode) cbq_destroy_class(sch, cl); - } } + qdisc_class_hash_destroy(&q->clhash); } static void cbq_put(struct Qdisc *sch, unsigned long arg) @@ -1749,12 +1754,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); #endif cbq_destroy_class(sch, cl); @@ -1783,7 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl) { /* Check parent */ if (parentid) { - if (cl->tparent && cl->tparent->classid != parentid) + if (cl->tparent && + cl->tparent->common.classid != parentid) return -EINVAL; if (!cl->tparent && parentid != TC_H_ROOT) return -EINVAL; @@ -1832,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1883,9 +1890,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) + if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; cl->allot = parent->allot; @@ -1918,9 +1926,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; @@ -2010,15 +2020,15 @@ static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); + struct cbq_class *cl; + struct hlist_node *n; unsigned h; if (arg->stop) return; - for (h = 0; h < 16; h++) { - struct cbq_class *cl; - - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 64465bacbe79..ba43aab3a851 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (new == NULL) new = &noop_qdisc; @@ -201,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (p->set_tc_index) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -209,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) & ~INET_ECN_MASK; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): if (skb_cow_head(skb, sizeof(struct ipv6hdr))) goto drop; @@ -235,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: goto drop; @@ -251,13 +252,14 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - err = p->q->enqueue(skb, p->q); + err = qdisc_enqueue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; @@ -266,7 +268,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) drop: kfree_skb(skb); sch->qstats.drops++; - return NET_XMIT_BYPASS; + return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) @@ -287,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) pr_debug("index %d->%d\n", skb->tc_index, index); switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], p->value[index]); break; @@ -320,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) err = p->q->ops->requeue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } @@ -390,7 +393,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; @@ -416,7 +420,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 95ed48221652..23d258bfe8ac 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct fifo_sched_data *q = qdisc_priv(sch); - if (likely(sch->qstats.backlog + skb->len <= q->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) struct fifo_sched_data *q = qdisc_priv(sch); if (opt == NULL) { - u32 limit = sch->dev->tx_queue_len ? : 1; + u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= sch->dev->mtu; + limit *= qdisc_dev(sch)->mtu; q->limit = limit; } else { @@ -107,3 +107,46 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; EXPORT_SYMBOL(bfifo_qdisc_ops); + +/* Pass size change message down to embedded FIFO */ +int fifo_set_limit(struct Qdisc *q, unsigned int limit) +{ + struct nlattr *nla; + int ret = -ENOMEM; + + /* Hack to avoid sending change message to non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + return 0; + + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); + if (nla) { + nla->nla_type = RTM_NEWQDISC; + nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); + ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; + + ret = q->ops->change(q, nla); + kfree(nla); + } + return ret; +} +EXPORT_SYMBOL(fifo_set_limit); + +struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, + unsigned int limit) +{ + struct Qdisc *q; + int err = -ENOMEM; + + q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + ops, TC_H_MAKE(sch->handle, 1)); + if (q) { + err = fifo_set_limit(q, limit); + if (err < 0) { + qdisc_destroy(q); + q = NULL; + } + } + + return q ? : ERR_PTR(err); +} +EXPORT_SYMBOL(fifo_create_dflt); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d355e5e47fe3..7b5572d6beb5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,71 +29,56 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * dev->queue_lock spinlock. + * qdisc_lock(qdisc) spinlock. * * The idea is the following: - * - enqueue, dequeue are serialized via top level device - * spinlock dev->queue_lock. - * - ingress filtering is serialized via top level device - * spinlock dev->ingress_lock. + * - enqueue, dequeue are serialized via qdisc root lock + * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex. */ -void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->queue_lock) - __acquires(dev->ingress_lock) -{ - spin_lock_bh(&dev->queue_lock); - spin_lock(&dev->ingress_lock); -} -EXPORT_SYMBOL(qdisc_lock_tree); - -void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->ingress_lock) - __releases(dev->queue_lock) -{ - spin_unlock(&dev->ingress_lock); - spin_unlock_bh(&dev->queue_lock); -} -EXPORT_SYMBOL(qdisc_unlock_tree); - static inline int qdisc_qlen(struct Qdisc *q) { return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, - struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); + q->gso_skb = skb; + q->qstats.requeues++; + __netif_schedule(q); - netif_schedule(dev); return 0; } -static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, - struct Qdisc *q) +static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb; + struct sk_buff *skb = q->gso_skb; + + if (unlikely(skb)) { + struct net_device *dev = qdisc_dev(q); + struct netdev_queue *txq; - if ((skb = dev->gso_skb)) - dev->gso_skb = NULL; - else + /* check the reason of requeuing without tx lock first */ + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + q->gso_skb = NULL; + else + skb = NULL; + } else { skb = q->dequeue(q); + } return skb; } static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { int ret; - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -103,7 +88,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, kfree_skb(skb); if (net_ratelimit()) printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev->name); + "fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); } else { /* @@ -111,22 +96,22 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); } return ret; } /* - * NOTE: Called under dev->queue_lock with locally disabled BH. + * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. dev->queue_lock serializes queue accesses for - * this device AND dev->qdisc pointer itself. + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. * * netif_tx_lock serializes accesses to device driver. * - * dev->queue_lock and netif_tx_lock are mutually exclusive, + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -136,27 +121,33 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct Qdisc *q) { - struct Qdisc *q = dev->qdisc; - struct sk_buff *skb; + struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + struct net_device *dev; + spinlock_t *root_lock; + struct sk_buff *skb; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; + root_lock = qdisc_lock(q); - /* And release queue */ - spin_unlock(&dev->queue_lock); + /* And release qdisc */ + spin_unlock(root_lock); - HARD_TX_LOCK(dev, smp_processor_id()); - if (!netif_subqueue_stopped(dev, skb)) - ret = dev_hard_start_xmit(skb, dev); - HARD_TX_UNLOCK(dev); + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - spin_lock(&dev->queue_lock); - q = dev->qdisc; + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) + ret = dev_hard_start_xmit(skb, dev, txq); + HARD_TX_UNLOCK(dev, txq); + + spin_lock(root_lock); switch (ret) { case NETDEV_TX_OK: @@ -166,7 +157,7 @@ static inline int qdisc_restart(struct net_device *dev) case NETDEV_TX_LOCKED: /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, dev, q); + ret = handle_dev_cpu_collision(skb, txq, q); break; default: @@ -175,33 +166,34 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); break; } + if (ret && (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq))) + ret = 0; + return ret; } -void __qdisc_run(struct net_device *dev) +void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; - while (qdisc_restart(dev)) { - if (netif_queue_stopped(dev)) - break; - + while (qdisc_restart(q)) { /* * Postpone processing if * 1. another process needs the CPU; * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule(dev); + __netif_schedule(q); break; } } - clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + clear_bit(__QDISC_STATE_RUNNING, &q->state); } static void dev_watchdog(unsigned long arg) @@ -209,19 +201,34 @@ static void dev_watchdog(unsigned long arg) struct net_device *dev = (struct net_device *)arg; netif_tx_lock(dev); - if (dev->qdisc != &noop_qdisc) { + if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { + int some_queue_stopped = 0; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", - dev->name); + txq = netdev_get_tx_queue(dev, i); + if (netif_tx_queue_stopped(txq)) { + some_queue_stopped = 1; + break; + } + } + + if (some_queue_stopped && + time_after(jiffies, (dev->trans_start + + dev->watchdog_timeo))) { + char drivername[64]; + WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", + dev->name, netdev_drivername(dev, drivername, 64)); dev->tx_timeout(dev); - WARN_ON_ONCE(1); } - if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + + dev->watchdog_timeo))) dev_hold(dev); } } @@ -317,12 +324,19 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct netdev_queue noop_netdev_queue = { + .qdisc = &noop_qdisc, +}; + struct Qdisc noop_qdisc = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), + .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), + .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -335,12 +349,20 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct Qdisc noqueue_qdisc; +static struct netdev_queue noqueue_netdev_queue = { + .qdisc = &noqueue_qdisc, +}; + static struct Qdisc noqueue_qdisc = { .enqueue = NULL, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), + .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), + .dev_queue = &noqueue_netdev_queue, }; @@ -364,7 +386,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { struct sk_buff_head *list = prio2list(skb, qdisc); - if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); } @@ -440,7 +462,8 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, + struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -458,28 +481,30 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); + skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; - sch->dev = dev; - dev_hold(dev); + sch->dev_queue = dev_queue; + dev_hold(qdisc_dev(sch)); atomic_set(&sch->refcnt, 1); return sch; errout: - return ERR_PTR(-err); + return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, +struct Qdisc * qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev->queue_lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -491,7 +516,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under dev->queue_lock and BH! */ +/* Under qdisc_lock(qdisc) and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -502,17 +527,6 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -/* this is the rcu callback function to clean up a qdisc when there - * are no further references to it */ - -static void __qdisc_destroy(struct rcu_head *head) -{ - struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); - kfree((char *) qdisc - qdisc->padded); -} - -/* Under dev->queue_lock and BH! */ - void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -521,7 +535,11 @@ void qdisc_destroy(struct Qdisc *qdisc) !atomic_dec_and_test(&qdisc->refcnt)) return; - list_del(&qdisc->list); +#ifdef CONFIG_NET_SCHED + qdisc_list_del(qdisc); + + qdisc_put_stab(qdisc->stab); +#endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) ops->reset(qdisc); @@ -529,65 +547,141 @@ void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put(qdisc->dev); - call_rcu(&qdisc->q_rcu, __qdisc_destroy); + dev_put(qdisc_dev(qdisc)); + + kfree_skb(qdisc->gso_skb); + __skb_queue_purge(&qdisc->requeue); + + kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); +static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (txq->qdisc_sleeping != &noop_qdisc) + return false; + } + return true; +} + +static void attach_one_default_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, dev_queue, + &pfifo_fast_ops, TC_H_ROOT); + if (!qdisc) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + } else { + qdisc = &noqueue_qdisc; + } + dev_queue->qdisc_sleeping = qdisc; +} + +static void transition_one_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_need_watchdog) +{ + struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + int *need_watchdog_p = _need_watchdog; + + if (!(new_qdisc->flags & TCQ_F_BUILTIN)) + clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); + + rcu_assign_pointer(dev_queue->qdisc, new_qdisc); + if (need_watchdog_p && new_qdisc != &noqueue_qdisc) + *need_watchdog_p = 1; +} + void dev_activate(struct net_device *dev) { + int need_watchdog; + /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, which need queueing and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, - TC_H_ROOT); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - list_add_tail(&qdisc->list, &dev->qdisc_list); - } else { - qdisc = &noqueue_qdisc; - } - dev->qdisc_sleeping = qdisc; - } + if (dev_all_qdisc_sleeping_noop(dev)) + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->queue_lock); - rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); - if (dev->qdisc != &noqueue_qdisc) { + need_watchdog = 0; + netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + transition_one_qdisc(dev, &dev->rx_queue, NULL); + + if (need_watchdog) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->queue_lock); } -void dev_deactivate(struct net_device *dev) +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { + struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - struct sk_buff *skb; - int running; - spin_lock_bh(&dev->queue_lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; + qdisc = dev_queue->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); + if (!(qdisc->flags & TCQ_F_BUILTIN)) + set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - skb = dev->gso_skb; - dev->gso_skb = NULL; - spin_unlock_bh(&dev->queue_lock); + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + qdisc_reset(qdisc); - kfree_skb(skb); + spin_unlock_bh(qdisc_lock(qdisc)); + } +} + +static bool some_qdisc_is_busy(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + spinlock_t *root_lock; + struct Qdisc *q; + int val; + + dev_queue = netdev_get_tx_queue(dev, i); + q = dev_queue->qdisc_sleeping; + root_lock = qdisc_lock(q); + + spin_lock_bh(root_lock); + + val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || + test_bit(__QDISC_STATE_SCHED, &q->state)); + + spin_unlock_bh(root_lock); + + if (val) + return true; + } + return false; +} + +void dev_deactivate(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); + dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -595,55 +689,46 @@ void dev_deactivate(struct net_device *dev) synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - do { - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) - yield(); + while (some_qdisc_is_busy(dev)) + yield(); +} - /* - * Double-check inside queue lock to ensure that all effects - * of the queue run are visible when we return. - */ - spin_lock_bh(&dev->queue_lock); - running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->queue_lock); +static void dev_init_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc) +{ + struct Qdisc *qdisc = _qdisc; - /* - * The running flag should never be set at this point because - * we've already set dev->qdisc to noop_qdisc *inside* the same - * pair of spin locks. That is, if any qdisc_run starts after - * our initial test it should see the noop_qdisc and then - * clear the RUNNING bit before dropping the queue lock. So - * if it is set here then we've found a bug. - */ - } while (WARN_ON_ONCE(running)); + dev_queue->qdisc = qdisc; + dev_queue->qdisc_sleeping = qdisc; } void dev_init_scheduler(struct net_device *dev) { - qdisc_lock_tree(dev); - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - INIT_LIST_HEAD(&dev->qdisc_list); - qdisc_unlock_tree(dev); + netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); + dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -void dev_shutdown(struct net_device *dev) +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + dev_queue->qdisc_sleeping = qdisc_default; - qdisc_lock_tree(dev); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; qdisc_destroy(qdisc); } -#endif - BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - qdisc_unlock_tree(dev); +} + +void dev_shutdown(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); + shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); + WARN_ON(timer_pending(&dev->watchdog_timer)); } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c89fba56db56..c1ad6b8de105 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) + if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) } q->packetsin++; - q->bytesin += skb->len; + q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); @@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - if (q->backlog + skb->len <= q->limit) { - q->backlog += skb->len; + if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { + q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } @@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) } else { if (red_is_idling(&q->parms)) red_end_of_idle_period(&q->parms); - q->backlog += skb->len; + q->backlog += qdisc_pkt_len(skb); } return qdisc_requeue(skb, sch); @@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) "VQ 0x%x after dequeue, screwing up " "backlog.\n", tc_index_to_dp(skb)); } else { - q->backlog -= skb->len; + q->backlog -= qdisc_pkt_len(skb); if (!q->backlog && !gred_wred_mode(t)) red_start_of_idle_period(&q->parms); @@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch) skb = qdisc_dequeue_tail(sch); if (skb) { - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index fdfaa3fcc16d..c1e77da8cd09 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -113,7 +113,7 @@ enum hfsc_class_flags struct hfsc_class { - u32 classid; /* class id */ + struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ struct gnet_stats_basic bstats; @@ -134,7 +134,6 @@ struct hfsc_class struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ @@ -177,13 +176,11 @@ struct hfsc_class unsigned long cl_nactive; /* number of active children */ }; -#define HFSC_HSIZE 16 - struct hfsc_sched { u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ - struct list_head clhash[HFSC_HSIZE]; /* class hash */ + struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ @@ -898,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } - len = skb->len; + len = qdisc_pkt_len(skb); if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { if (net_ratelimit()) printk("qdisc_peek_len: failed to requeue\n"); @@ -933,26 +930,16 @@ hfsc_adjust_levels(struct hfsc_class *cl) } while ((cl = cl->cl_parent) != NULL); } -static inline unsigned int -hfsc_hash(u32 h) -{ - h ^= h >> 8; - h ^= h >> 4; - - return h & (HFSC_HSIZE - 1); -} - static inline struct hfsc_class * hfsc_find_class(u32 classid, struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl; + struct Qdisc_class_common *clc; - list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) { - if (cl->classid == classid) - return cl; - } - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct hfsc_class, cl_common); } static void @@ -1032,7 +1019,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { if (parentid) { - if (cl->cl_parent && cl->cl_parent->classid != parentid) + if (cl->cl_parent && + cl->cl_parent->cl_common.classid != parentid) return -EINVAL; if (cl->cl_parent == NULL && parentid != TC_H_ROOT) return -EINVAL; @@ -1057,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); return 0; } @@ -1091,11 +1079,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, 0); + cl->cl_common.classid = classid; cl->refcnt = 1; - cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; INIT_LIST_HEAD(&cl->children); @@ -1103,7 +1092,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cf_tree = RB_ROOT; sch_tree_lock(sch); - list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); + qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) hfsc_purge_queue(sch, parent); @@ -1111,9 +1100,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1123,7 +1114,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->bstats, &cl->rate_est); if (cl != &q->root) @@ -1145,7 +1136,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_adjust_levels(cl->cl_parent); hfsc_purge_queue(sch, cl); - list_del(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->cl_common); if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1168,14 +1159,14 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (cl->level == 0) return cl; - *qerr = NET_XMIT_BYPASS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; tcf = q->root.filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -1211,8 +1202,9 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid); + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->cl_common.classid); if (new == NULL) new = &noop_qdisc; } @@ -1345,8 +1337,9 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct hfsc_class *cl = (struct hfsc_class *)arg; struct nlattr *nest; - tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid : + TC_H_ROOT; + tcm->tcm_handle = cl->cl_common.classid; if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; @@ -1390,14 +1383,16 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); + struct hlist_node *n; struct hfsc_class *cl; unsigned int i; if (arg->stop) return; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], + cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1433,23 +1428,25 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; - unsigned int i; + int err; if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); q->defcls = qopt->defcls; - for (i = 0; i < HFSC_HSIZE; i++) - INIT_LIST_HEAD(&q->clhash[i]); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); skb_queue_head_init(&q->requeue); + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; - q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; @@ -1457,7 +1454,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; - list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); + qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); + qdisc_class_hash_grow(sch, &q->clhash); qdisc_watchdog_init(&q->watchdog, sch); @@ -1520,10 +1518,11 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; + struct hlist_node *n; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } __skb_queue_purge(&q->requeue); @@ -1537,13 +1536,20 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl, *next; + struct hlist_node *n, *next; + struct hfsc_class *cl; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + cl_common.hnode) hfsc_destroy_class(sch, cl); } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1568,32 +1574,32 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - unsigned int len; int err; cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { - if (err == NET_XMIT_BYPASS) + if (err & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return err; } - len = skb->len; - err = cl->qdisc->enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { - cl->qstats.drops++; - sch->qstats.drops++; + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } return err; } if (cl->qdisc->q.qlen == 1) - set_active(cl, len); + set_active(cl, qdisc_pkt_len(skb)); cl->bstats.packets++; - cl->bstats.bytes += len; + cl->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - sch->bstats.bytes += len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1643,9 +1649,9 @@ hfsc_dequeue(struct Qdisc *sch) return NULL; } - update_vf(cl, skb->len, cur_time); + update_vf(cl, qdisc_pkt_len(skb), cur_time); if (realtime) - cl->cl_cumul += skb->len; + cl->cl_cumul += qdisc_pkt_len(skb); if (cl->qdisc->q.qlen != 0) { if (cl->cl_flags & HFSC_RSC) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6807c97985a5..d14f02056ae6 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -24,8 +24,6 @@ * Jiri Fojtasek * fixed requeue routine * and many others. thanks. - * - * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> #include <linux/moduleparam.h> @@ -53,7 +51,6 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -74,8 +71,8 @@ enum htb_cmode { /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { + struct Qdisc_class_common common; /* general class parameters */ - u32 classid; struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; @@ -84,10 +81,8 @@ struct htb_class { /* topology */ int level; /* our level (see above) */ + unsigned int children; struct htb_class *parent; /* parent class */ - struct hlist_node hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ union { struct htb_class_leaf { @@ -142,8 +137,7 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, } struct htb_sched { - struct list_head root; /* root classes list */ - struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ + struct Qdisc_class_hash clhash; struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* self list - roots of self generating tree */ @@ -165,7 +159,6 @@ struct htb_sched { /* filters for qdisc itself */ struct tcf_proto *filter_list; - int filter_cnt; int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ @@ -178,32 +171,16 @@ struct htb_sched { long direct_pkts; }; -/* compute hash of size HTB_HSIZE for given handle */ -static inline int htb_hash(u32 h) -{ -#if HTB_HSIZE != 16 -#error "Declare new hash for your HTB_HSIZE" -#endif - h ^= h >> 8; /* stolen from cbq_hash */ - h ^= h >> 4; - return h & 0xf; -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; + struct Qdisc_class_common *clc; - if (TC_H_MAJ(handle) != sch->handle) + clc = qdisc_class_find(&q->clhash, handle); + if (clc == NULL) return NULL; - - hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { - if (cl->classid == handle) - return cl; - } - return NULL; + return container_of(clc, struct htb_class, common); } /** @@ -237,14 +214,14 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) return cl; - *qerr = NET_XMIT_BYPASS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; tcf = q->filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -284,7 +261,7 @@ static void htb_add_to_id_tree(struct rb_root *root, parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - if (cl->classid > c->classid) + if (cl->common.classid > c->common.classid) p = &parent->rb_right; else p = &parent->rb_left; @@ -448,7 +425,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ - p->un.inner.last_ptr_id[prio] = cl->classid; + p->un.inner.last_ptr_id[prio] = cl->common.classid; p->un.inner.ptr[prio] = NULL; } @@ -547,7 +524,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); + WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); @@ -565,7 +542,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - BUG_TRAP(cl->prio_activity); + WARN_ON(!cl->prio_activity); htb_deactivate_prios(q, cl); cl->prio_activity = 0; @@ -590,26 +567,27 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != - NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } + return ret; } else { cl->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - cl->bstats.bytes += skb->len; + cl->bstats.bytes += qdisc_pkt_len(skb); htb_activate(q, cl); } sch->q.qlen++; sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); return NET_XMIT_SUCCESS; } @@ -634,16 +612,18 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != + } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } + return ret; } else htb_activate(q, cl); @@ -666,7 +646,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, int level, struct sk_buff *skb) { - int bytes = skb->len; + int bytes = qdisc_pkt_len(skb); long toks, diff; enum htb_cmode old_mode; @@ -753,10 +733,10 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, while (n) { struct htb_class *cl = rb_entry(n, struct htb_class, node[prio]); - if (id == cl->classid) + if (id == cl->common.classid) return n; - if (id > cl->classid) { + if (id > cl->common.classid) { n = n->rb_right; } else { r = n; @@ -781,7 +761,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_TRAP(tree->rb_node); + WARN_ON(!tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; @@ -801,7 +781,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); + WARN_ON(!*sp->pptr); if (!*sp->pptr) return NULL; htb_next_rb_node(sp->pptr); @@ -816,7 +796,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, sp->pid = cl->un.inner.last_ptr_id + prio; } } - BUG_TRAP(0); + WARN_ON(1); return NULL; } @@ -834,7 +814,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, do { next: - BUG_TRAP(cl); + WARN_ON(!cl); if (!cl) return NULL; @@ -866,7 +846,7 @@ next: if (!cl->warned) { printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n", - cl->classid); + cl->common.classid); cl->warned = 1; } q->nwc_hit++; @@ -879,7 +859,8 @@ next: } while (cl != start); if (likely(skb != NULL)) { - if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { + cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); + if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->un.leaf.quantum; htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); @@ -977,13 +958,12 @@ static unsigned int htb_drop(struct Qdisc *sch) static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - int i; - - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1041,16 +1021,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } - INIT_LIST_HEAD(&q->root); - for (i = 0; i < HTB_HSIZE; i++) - INIT_HLIST_HEAD(q->hash + i); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); qdisc_watchdog_init(&q->watchdog, sch); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = sch->dev->tx_queue_len; + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; @@ -1063,11 +1043,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1081,11 +1062,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1094,12 +1075,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev->queue_lock); - tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + spin_lock_bh(root_lock); + tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; @@ -1119,11 +1101,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1153,8 +1135,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl && !cl->level) { if (new == NULL && - (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) + (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); @@ -1195,12 +1178,9 @@ static inline int htb_parent_last_child(struct htb_class *cl) if (!cl->parent) /* the root class */ return 0; - - if (!(cl->parent->children.next == &cl->sibling && - cl->parent->children.prev == &cl->sibling)) + if (cl->parent->children > 1) /* not the last child */ return 0; - return 1; } @@ -1209,7 +1189,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, { struct htb_class *parent = cl->parent; - BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); @@ -1228,32 +1208,15 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { - struct htb_sched *q = qdisc_priv(sch); - if (!cl->level) { - BUG_TRAP(cl->un.leaf.q); + WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class(sch, list_entry(cl->children.next, - struct htb_class, sibling)); - - /* note: this delete may happen twice (see htb_delete) */ - hlist_del_init(&cl->hlist); - list_del(&cl->sibling); - - if (cl->prio_activity) - htb_deactivate(q, cl); - - if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); - + tcf_destroy_chain(&cl->filter_list); kfree(cl); } @@ -1261,18 +1224,27 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); + struct hlist_node *n, *next; + struct htb_class *cl; + unsigned int i; qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - tcf_destroy_chain(q->filter_list); - - while (!list_empty(&q->root)) - htb_destroy_class(sch, list_entry(q->root.next, - struct htb_class, sibling)); + tcf_destroy_chain(&q->filter_list); + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + htb_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->direct_queue); } @@ -1287,12 +1259,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? - if (!list_empty(&cl->children) || cl->filter_cnt) + if (cl->children || cl->filter_cnt) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->parent->classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->parent->common.classid); last_child = 1; } @@ -1305,11 +1278,16 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) } /* delete from hash and active; remainder in destroy_class */ - hlist_del_init(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->common); + if (cl->parent) + cl->parent->children--; if (cl->prio_activity) htb_deactivate(q, cl); + if (cl->cmode != HTB_CAN_SEND) + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + if (last_child) htb_parent_to_leaf(q, cl, new_q); @@ -1394,12 +1372,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_sleeping_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; - INIT_LIST_HEAD(&cl->sibling); - INIT_HLIST_NODE(&cl->hlist); - INIT_LIST_HEAD(&cl->children); + cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); @@ -1409,7 +1385,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; @@ -1433,7 +1410,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1444,13 +1421,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); - list_add_tail(&cl->sibling, - parent ? &parent->children : &q->root); + qdisc_class_hash_insert(&q->clhash, &cl->common); + if (parent) + parent->children++; } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } @@ -1462,13 +1439,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->quantum && cl->un.leaf.quantum < 1000) { printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1491,6 +1468,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->ceil = ctab; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + *arg = (unsigned long)cl; return 0; @@ -1514,7 +1493,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_find(classid, sch); /*if (cl && !cl->level) return 0; @@ -1528,35 +1506,29 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, */ if (cl) cl->filter_cnt++; - else - q->filter_cnt++; return (unsigned long)cl; } static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; if (cl) cl->filter_cnt--; - else - q->filter_cnt--; } static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); - int i; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; if (arg->stop) return; - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; - - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 274b1ddb160c..4a2b77374358 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) result = tc_classify(skb, p->filter_list, &res); sch->bstats.packets++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); switch (result) { case TC_ACT_SHOT: result = TC_ACT_SHOT; @@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c new file mode 100644 index 000000000000..915f3149dde2 --- /dev/null +++ b/net/sched/sch_multiq.c @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck <alexander.h.duyck@intel.com> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + + +struct multiq_sched_data { + u16 bands; + u16 max_bands; + u16 curband; + struct tcf_proto *filter_list; + struct Qdisc **queues; +}; + + +static struct Qdisc * +multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + u32 band; + struct tcf_result res; + int err; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + err = tc_classify(skb, q->filter_list, &res); +#ifdef CONFIG_NET_CLS_ACT + switch (err) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + band = skb_get_queue_mapping(skb); + + if (band >= q->bands) + return q->queues[0]; + + return q->queues[band]; +} + +static int +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); + sch->bstats.packets++; + sch->q.qlen++; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static int +multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + struct multiq_sched_data *q = qdisc_priv(sch); + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc->ops->requeue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->q.qlen++; + sch->qstats.requeues++; + if (q->curband) + q->curband--; + else + q->curband = q->bands - 1; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static struct sk_buff *multiq_dequeue(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid excessive requeues + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } + } + } + return NULL; + +} + +static unsigned int multiq_drop(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + unsigned int len; + struct Qdisc *qdisc; + + for (band = q->bands-1; band >= 0; band--) { + qdisc = q->queues[band]; + if (qdisc->ops->drop) { + len = qdisc->ops->drop(qdisc); + if (len != 0) { + sch->q.qlen--; + return len; + } + } + } + return 0; +} + + +static void +multiq_reset(struct Qdisc *sch) +{ + u16 band; + struct multiq_sched_data *q = qdisc_priv(sch); + + for (band = 0; band < q->bands; band++) + qdisc_reset(q->queues[band]); + sch->q.qlen = 0; + q->curband = 0; +} + +static void +multiq_destroy(struct Qdisc *sch) +{ + int band; + struct multiq_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + for (band = 0; band < q->bands; band++) + qdisc_destroy(q->queues[band]); + + kfree(q->queues); +} + +static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct tc_multiq_qopt *qopt; + int i; + + if (!netif_is_multiqueue(qdisc_dev(sch))) + return -EINVAL; + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + + qopt = nla_data(opt); + + qopt->bands = qdisc_dev(sch)->real_num_tx_queues; + + sch_tree_lock(sch); + q->bands = qopt->bands; + for (i = q->bands; i < q->max_bands; i++) { + if (q->queues[i] != &noop_qdisc) { + struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_destroy(child); + } + } + + sch_tree_unlock(sch); + + for (i = 0; i < q->bands; i++) { + if (q->queues[i] == &noop_qdisc) { + struct Qdisc *child; + child = qdisc_create_dflt(qdisc_dev(sch), + sch->dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, + i + 1)); + if (child) { + sch_tree_lock(sch); + child = xchg(&q->queues[i], child); + + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); + qdisc_destroy(child); + } + sch_tree_unlock(sch); + } + } + } + return 0; +} + +static int multiq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int i, err; + + q->queues = NULL; + + if (opt == NULL) + return -EINVAL; + + q->max_bands = qdisc_dev(sch)->num_tx_queues; + + q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); + if (!q->queues) + return -ENOBUFS; + for (i = 0; i < q->max_bands; i++) + q->queues[i] = &noop_qdisc; + + err = multiq_tune(sch,opt); + + if (err) + kfree(q->queues); + + return err; +} + +static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_multiq_qopt opt; + + opt.bands = q->bands; + opt.max_bands = q->max_bands; + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return -EINVAL; + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->queues[band]; + q->queues[band] = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; +} + +static struct Qdisc * +multiq_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return NULL; + + return q->queues[band]; +} + +static unsigned long multiq_get(struct Qdisc *sch, u32 classid) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = TC_H_MIN(classid); + + if (band - 1 >= q->bands) + return 0; + return band; +} + +static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return multiq_get(sch, classid); +} + + +static void multiq_put(struct Qdisc *q, unsigned long cl) +{ + return; +} + +static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent, + struct nlattr **tca, unsigned long *arg) +{ + unsigned long cl = *arg; + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + +static int multiq_delete(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + + +static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + tcm->tcm_handle |= TC_H_MIN(cl); + if (q->queues[cl-1]) + tcm->tcm_info = q->queues[cl-1]->handle; + return 0; +} + +static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *cl_q; + + cl_q = q->queues[cl - 1]; + if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 || + gnet_stats_copy_queue(d, &cl_q->qstats) < 0) + return -1; + + return 0; +} + +static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + + if (arg->stop) + return; + + for (band = 0; band < q->bands; band++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, band+1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static const struct Qdisc_class_ops multiq_class_ops = { + .graft = multiq_graft, + .leaf = multiq_leaf, + .get = multiq_get, + .put = multiq_put, + .change = multiq_change, + .delete = multiq_delete, + .walk = multiq_walk, + .tcf_chain = multiq_find_tcf, + .bind_tcf = multiq_bind, + .unbind_tcf = multiq_put, + .dump = multiq_dump_class, + .dump_stats = multiq_dump_class_stats, +}; + +static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { + .next = NULL, + .cl_ops = &multiq_class_ops, + .id = "multiq", + .priv_size = sizeof(struct multiq_sched_data), + .enqueue = multiq_enqueue, + .dequeue = multiq_dequeue, + .requeue = multiq_requeue, + .drop = multiq_drop, + .init = multiq_init, + .reset = multiq_reset, + .destroy = multiq_destroy, + .change = multiq_tune, + .dump = multiq_dump, + .owner = THIS_MODULE, +}; + +static int __init multiq_module_init(void) +{ + return register_qdisc(&multiq_qdisc_ops); +} + +static void __exit multiq_module_exit(void) +{ + unregister_qdisc(&multiq_qdisc_ops); +} + +module_init(multiq_module_init) +module_exit(multiq_module_exit) + +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c9c649b26eaa..a11959908d9a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -82,6 +82,13 @@ struct netem_skb_cb { psched_time_t time_to_send; }; +static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; +} + /* init_crandom - initialize correlated random number generator * Use entropy source for initial seed. */ @@ -169,7 +176,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (count == 0) { sch->qstats.drops++; kfree_skb(skb); - return NET_XMIT_BYPASS; + return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } skb_orphan(skb); @@ -180,11 +187,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = sch->dev->qdisc; + struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq); + qdisc_enqueue_root(skb2, rootq); q->duplicate = dupsave; } @@ -205,7 +212,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } - cb = (struct netem_skb_cb *)skb->cb; + cb = netem_skb_cb(skb); if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { @@ -218,7 +225,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); cb->time_to_send = now + delay; ++q->counter; - ret = q->qdisc->enqueue(skb, q->qdisc); + ret = qdisc_enqueue(skb, q->qdisc); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -231,10 +238,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - } else + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; + } pr_debug("netem: enqueue ret %d\n", ret); return ret; @@ -277,8 +285,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb = q->qdisc->dequeue(q->qdisc); if (skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ @@ -310,28 +317,6 @@ static void netem_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } -/* Pass size change message down to embedded FIFO */ -static int set_fifo_limit(struct Qdisc *q, int limit) -{ - struct nlattr *nla; - int ret = -ENOMEM; - - /* Hack to avoid sending change message to non-FIFO */ - if (strncmp(q->ops->id + 1, "fifo", 4) != 0) - return 0; - - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - } - return ret; -} - /* * Distribution data is a variable size payload containing * signed 16 bit values. @@ -341,6 +326,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) struct netem_sched_data *q = qdisc_priv(sch); unsigned long n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); + spinlock_t *root_lock; struct disttable *d; int i; @@ -355,9 +341,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&sch->dev->queue_lock); + root_lock = qdisc_root_sleeping_lock(sch); + + spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); kfree(d); return 0; @@ -400,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, }; +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len < 0) + return -EINVAL; + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt) { @@ -411,12 +413,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy, - qopt, sizeof(*qopt)); + qopt = nla_data(opt); + ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) return ret; - ret = set_fifo_limit(q->qdisc, qopt->limit); + ret = fifo_set_limit(q->qdisc, qopt->limit); if (ret) { pr_debug("netem: can't set fifo limit\n"); return ret; @@ -476,7 +478,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); struct sk_buff_head *list = &sch->q; - psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send; + psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; struct sk_buff *skb; if (likely(skb_queue_len(list) < q->limit)) { @@ -487,8 +489,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) } skb_queue_reverse_walk(list, skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); if (tnext >= cb->time_to_send) break; @@ -496,8 +497,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); - sch->qstats.backlog += nskb->len; - sch->bstats.bytes += nskb->len; + sch->qstats.backlog += qdisc_pkt_len(nskb); + sch->bstats.bytes += qdisc_pkt_len(nskb); sch->bstats.packets++; return NET_XMIT_SUCCESS; @@ -517,7 +518,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } else - q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); q->oldest = PSCHED_PASTPERFECT; return 0; @@ -558,7 +559,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4aa2b45dad0a..504a78cdb718 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -24,11 +24,9 @@ struct prio_sched_data { int bands; - int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; - int mq; }; @@ -40,14 +38,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct tcf_result res; int err; - *qerr = NET_XMIT_BYPASS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { err = tc_classify(skb, q->filter_list, &res); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -55,17 +53,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; - band = q->prio2band[band&TC_PRIO_MAX]; - goto out; + return q->queues[q->prio2band[band&TC_PRIO_MAX]]; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - band = q->prio2band[0]; -out: - if (q->mq) - skb_set_queue_mapping(skb, band); + return q->queues[q->prio2band[0]]; + return q->queues[band]; } @@ -79,20 +74,22 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) #ifdef CONFIG_NET_CLS_ACT if (qdisc == NULL) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; } #endif - if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->bstats.bytes += skb->len; + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; return NET_XMIT_SUCCESS; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } @@ -106,7 +103,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) qdisc = prio_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT if (qdisc == NULL) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; @@ -116,74 +113,31 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->qstats.requeues++; - return 0; + return NET_XMIT_SUCCESS; } - sch->qstats.drops++; - return NET_XMIT_DROP; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; } -static struct sk_buff * -prio_dequeue(struct Qdisc* sch) +static struct sk_buff *prio_dequeue(struct Qdisc* sch) { - struct sk_buff *skb; struct prio_sched_data *q = qdisc_priv(sch); int prio; - struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. - */ - if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; - } + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; } } return NULL; } -static struct sk_buff *rr_dequeue(struct Qdisc* sch) -{ - struct sk_buff *skb; - struct prio_sched_data *q = qdisc_priv(sch); - struct Qdisc *qdisc; - int bandcount; - - /* Only take one pass through the queues. If nothing is available, - * return nothing. - */ - for (bandcount = 0; bandcount < q->bands; bandcount++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. If the queue is stopped, try the - * next queue. - */ - if (!__netif_subqueue_stopped(sch->dev, - (q->mq ? q->curband : 0))) { - qdisc = q->queues[q->curband]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - return skb; - } - } - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - } - return NULL; -} - static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -219,7 +173,7 @@ prio_destroy(struct Qdisc* sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); for (prio=0; prio<q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -228,45 +182,22 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; - struct nlattr *tb[TCA_PRIO_MAX + 1]; - int err; int i; - err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt, - sizeof(*qopt)); - if (err < 0) - return err; - - q->bands = qopt->bands; - /* If we're multiqueue, make sure the number of incoming bands - * matches the number of queues on the device we're associating with. - * If the number of bands requested is zero, then set q->bands to - * dev->egress_subqueue_count. Also, the root qdisc must be the - * only one that is enabled for multiqueue, since it's the only one - * that interacts with the underlying device. - */ - q->mq = nla_get_flag(tb[TCA_PRIO_MQ]); - if (q->mq) { - if (sch->parent != TC_H_ROOT) - return -EINVAL; - if (netif_is_multiqueue(sch->dev)) { - if (q->bands == 0) - q->bands = sch->dev->egress_subqueue_count; - else if (q->bands != sch->dev->egress_subqueue_count) - return -EINVAL; - } else - return -EOPNOTSUPP; - } + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + qopt = nla_data(opt); - if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) + if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= q->bands) + if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } sch_tree_lock(sch); + q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { @@ -281,7 +212,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); @@ -322,20 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); - if (nest == NULL) - goto nla_put_failure; - if (q->mq) { - if (nla_put_flag(skb, TCA_PRIO_MQ) < 0) - goto nla_put_failure; - } - nla_nest_compat_end(skb, nest); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -507,44 +431,17 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static struct Qdisc_ops rr_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &prio_class_ops, - .id = "rr", - .priv_size = sizeof(struct prio_sched_data), - .enqueue = prio_enqueue, - .dequeue = rr_dequeue, - .requeue = prio_requeue, - .drop = prio_drop, - .init = prio_init, - .reset = prio_reset, - .destroy = prio_destroy, - .change = prio_tune, - .dump = prio_dump, - .owner = THIS_MODULE, -}; - static int __init prio_module_init(void) { - int err; - - err = register_qdisc(&prio_qdisc_ops); - if (err < 0) - return err; - err = register_qdisc(&rr_qdisc_ops); - if (err < 0) - unregister_qdisc(&prio_qdisc_ops); - return err; + return register_qdisc(&prio_qdisc_ops); } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); - unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); -MODULE_ALIAS("sch_rr"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 5c569853b9c0..5da05839e225 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -92,12 +92,12 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - ret = child->enqueue(skb, child); + ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; - } else { + } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; sch->qstats.drops++; } @@ -174,33 +174,6 @@ static void red_destroy(struct Qdisc *sch) qdisc_destroy(q->qdisc); } -static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - return NULL; -} - static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, @@ -228,9 +201,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) ctl = nla_data(tb[TCA_RED_PARMS]); if (ctl->limit > 0) { - child = red_create_dflt(sch, ctl->limit); - if (child == NULL) - return -ENOMEM; + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + if (IS_ERR(child)) + return PTR_ERR(child); } sch_tree_lock(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f0463d757a98..fe1508ef0d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) u32 h, h2; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; @@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 ^= *(((u32*)iph) + iph->ihl); break; } - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; @@ -171,14 +171,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, if (!q->filter_list) return sfq_hash(q, skb) + 1; - *qerr = NET_XMIT_BYPASS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, q->filter_list, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; } @@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d > 1) { sfq_index x = q->dep[d + SFQ_DEPTH].next; skb = q->qs[x].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); sfq_dec(q, x); @@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) q->next[q->tail] = q->next[d]; q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); kfree_skb(skb); sfq_dec(q, d); @@ -285,7 +285,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) hash = sfq_classify(skb, sch, &ret); if (hash == 0) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; @@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->qs[x].qlen >= q->limit) return qdisc_drop(skb, sch); - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } if (++sch->q.qlen <= q->limit) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -339,7 +339,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) hash = sfq_classify(skb, sch, &ret); if (hash == 0) { - if (ret == NET_XMIT_BYPASS) + if (ret & __NET_XMIT_BYPASS) sch->qstats.drops++; kfree_skb(skb); return ret; @@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) q->hash[x] = hash; } - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_head(&q->qs[x], skb); /* If selected queue has length q->limit+1, this means that * all another queues are empty and we do simple tail drop. @@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) skb = q->qs[x].prev; __skb_unlink(skb, &q->qs[x]); sch->qstats.drops++; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); kfree_skb(skb); return NET_XMIT_CN; } @@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch) skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (q->qs[a].qlen == 0) { @@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch) } q->next[q->tail] = a; q->allot[a] += q->quantum; - } else if ((q->allot[a] -= skb->len) <= 0) { + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { q->tail = a; a = q->next[a]; q->allot[a] += q->quantum; @@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = psched_mtu(sch->dev); + q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; q->perturbation = net_random(); } else { @@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 0b7d78f59d8c..94c61598b86a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -123,23 +123,18 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (skb->len > q->max_size) { - sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_ACT - if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) -#endif - kfree_skb(skb); - - return NET_XMIT_DROP; - } + if (qdisc_pkt_len(skb) > q->max_size) + return qdisc_reshape_fail(skb, sch); - if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) { - sch->qstats.drops++; + ret = qdisc_enqueue(skb, q->qdisc); + if (ret != 0) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -180,7 +175,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) psched_time_t now; long toks; long ptoks = 0; - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); now = psched_get_time(); toks = psched_tdiff_bounded(now, q->t_c, q->buffer); @@ -242,34 +237,6 @@ static void tbf_reset(struct Qdisc* sch) qdisc_watchdog_cancel(&q->watchdog); } -static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - - return NULL; -} - static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, @@ -322,8 +289,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) goto done; if (qopt->limit > 0) { - if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + if (IS_ERR(child)) { + err = PTR_ERR(child); goto done; + } } sch_tree_lock(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0444fd0f0d22..d35ef059abb1 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,12 +78,12 @@ struct teql_sched_data static int teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -107,17 +107,19 @@ static struct sk_buff * teql_dequeue(struct Qdisc* sch) { struct teql_sched_data *dat = qdisc_priv(sch); + struct netdev_queue *dat_queue; struct sk_buff *skb; skb = __skb_dequeue(&dat->q); + dat_queue = netdev_get_tx_queue(dat->m->dev, 0); if (skb == NULL) { - struct net_device *m = dat->m->dev->qdisc->dev; + struct net_device *m = qdisc_dev(dat_queue->qdisc); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } - sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; } @@ -153,10 +155,16 @@ teql_destroy(struct Qdisc* sch) if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { + struct netdev_queue *txq; + spinlock_t *root_lock; + + txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - spin_lock_bh(&master->dev->queue_lock); - qdisc_reset(master->dev->qdisc); - spin_unlock_bh(&master->dev->queue_lock); + + root_lock = qdisc_root_sleeping_lock(txq->qdisc); + spin_lock_bh(root_lock); + qdisc_reset(txq->qdisc); + spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); @@ -170,7 +178,7 @@ teql_destroy(struct Qdisc* sch) static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master*)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); @@ -216,7 +224,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - struct teql_sched_data *q = qdisc_priv(dev->qdisc); + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); struct neighbour *mn = skb->dst->neighbour; struct neighbour *n = q->ncache; @@ -252,7 +261,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - if (dev->qdisc == &noop_qdisc) + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc == &noop_qdisc) return -ENODEV; if (dev->header_ops == NULL || @@ -268,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) struct Qdisc *start, *q; int busy; int nores; - int len = skb->len; int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; @@ -282,12 +291,13 @@ restart: goto drop; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); + struct netdev_queue *slave_txq; - if (slave->qdisc_sleeping != q) + slave_txq = netdev_get_tx_queue(slave, 0); + if (slave_txq->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || - __netif_subqueue_stopped(slave, subq) || + if (__netif_subqueue_stopped(slave, subq) || !netif_running(slave)) { busy = 1; continue; @@ -295,18 +305,19 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: - if (netif_tx_trylock(slave)) { - if (!netif_queue_stopped(slave) && - !__netif_subqueue_stopped(slave, subq) && + if (__netif_tx_trylock(slave_txq)) { + if (!netif_tx_queue_stopped(slave_txq) && + !netif_tx_queue_frozen(slave_txq) && slave->hard_start_xmit(skb, slave) == 0) { - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += len; + master->stats.tx_bytes += + qdisc_pkt_len(skb); return 0; } - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); } if (netif_queue_stopped(dev)) busy = 1; @@ -352,7 +363,7 @@ static int teql_master_open(struct net_device *dev) q = m->slaves; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; @@ -403,7 +414,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) q = m->slaves; if (q) { do { - if (new_mtu > q->dev->mtu) + if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q=NEXT_SLAVE(q)) != m->slaves); } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 0b79f869c4ea..58b3e882a187 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -47,11 +47,11 @@ config SCTP_DBG_MSG config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" + depends on PROC_FS help If you say Y, this will enable debugging support for counting the type of objects that are currently allocated. This is useful for - identifying memory leaks. If the /proc filesystem is enabled this - debug information can be viewed by + identifying memory leaks. This debug information can be viewed by 'cat /proc/net/sctp/sctp_dbg_objcnt' If unsure, say N diff --git a/net/sctp/Makefile b/net/sctp/Makefile index f5356b9d5ee3..6b794734380a 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -9,10 +9,10 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o command.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o proc.o \ - auth.o + output.o input.o debug.o ssnmap.o auth.o sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o +sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o sctp-$(subst m,y,$(CONFIG_IPV6)) += ipv6.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 024c3ebd9661..f4b23043b610 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -136,6 +136,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Set association default SACK delay */ asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); + asoc->sackfreq = sp->sackfreq; /* Set the association default flags controlling * Heartbeat, SACK delay, and Path MTU Discovery. @@ -261,6 +262,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * already received one packet.] */ asoc->peer.sack_needed = 1; + asoc->peer.sack_cnt = 0; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. @@ -281,8 +283,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - /* Set up the tsn tracking. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, 0); + memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap)); asoc->need_ecne = 0; @@ -400,6 +401,8 @@ void sctp_association_free(struct sctp_association *asoc) /* Dispose of any pending chunks on the inqueue. */ sctp_inq_free(&asoc->base.inqueue); + sctp_tsnmap_free(&asoc->peer.tsn_map); + /* Free ssnmap storage. */ sctp_ssnmap_free(asoc->ssnmap); @@ -462,7 +465,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) spin_unlock_bh(&sctp_assocs_id_lock); } - BUG_TRAP(!atomic_read(&asoc->rmem_alloc)); + WARN_ON(atomic_read(&asoc->rmem_alloc)); if (asoc->base.malloced) { kfree(asoc); @@ -597,11 +600,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { + /* An UNKNOWN state is only set on transports added by + * user in sctp_connectx() call. Such transports should be + * considered CONFIRMED per RFC 4960, Section 5.4. + */ if (peer->state == SCTP_UNKNOWN) { - if (peer_state == SCTP_ACTIVE) - peer->state = SCTP_ACTIVE; - if (peer_state == SCTP_UNCONFIRMED) - peer->state = SCTP_UNCONFIRMED; + peer->state = SCTP_ACTIVE; } return peer; } @@ -624,6 +628,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * association configured value. */ peer->sackdelay = asoc->sackdelay; + peer->sackfreq = asoc->sackfreq; /* Enable/disable heartbeat, SACK delay, and path MTU discovery * based on association setting. @@ -650,6 +655,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " "%d\n", asoc, asoc->pathmtu); + peer->pmtu_pending = 0; asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); @@ -1117,8 +1123,8 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; asoc->peer.i = new->peer.i; - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, GFP_ATOMIC); /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 675a5c3e68a6..52db5f60daa0 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -80,6 +80,10 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) { struct sctp_auth_bytes *key; + /* Verify that we are not going to overflow INT_MAX */ + if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes)) + return NULL; + /* Allocate the shared key */ key = kmalloc(sizeof(struct sctp_auth_bytes) + key_len, gfp); if (!key) @@ -782,6 +786,9 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, for (i = 0; i < hmacs->shmac_num_idents; i++) { id = hmacs->shmac_idents[i]; + if (id > SCTP_AUTH_HMAC_ID_MAX) + return -EOPNOTSUPP; + if (SCTP_AUTH_HMAC_ID_SHA1 == id) has_sha1 = 1; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 80e6df06967a..6d5944a745d4 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -348,6 +348,43 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, return match; } +/* Does the address 'addr' conflict with any addresses in + * the bp. + */ +int sctp_bind_addr_conflict(struct sctp_bind_addr *bp, + const union sctp_addr *addr, + struct sctp_sock *bp_sp, + struct sctp_sock *addr_sp) +{ + struct sctp_sockaddr_entry *laddr; + int conflict = 0; + struct sctp_sock *sp; + + /* Pick the IPv6 socket as the basis of comparison + * since it's usually a superset of the IPv4. + * If there is no IPv6 socket, then default to bind_addr. + */ + if (sctp_opt2sk(bp_sp)->sk_family == AF_INET6) + sp = bp_sp; + else if (sctp_opt2sk(addr_sp)->sk_family == AF_INET6) + sp = addr_sp; + else + sp = bp_sp; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; + + conflict = sp->pf->cmp_addr(&laddr->a, addr, sp); + if (conflict) + break; + } + rcu_read_unlock(); + + return conflict; +} + /* Get the state of the entry in the bind_addr_list */ int sctp_bind_addr_state(const struct sctp_bind_addr *bp, const union sctp_addr *addr) @@ -420,7 +457,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, { int error = 0; - if (sctp_is_any(addr)) { + if (sctp_is_any(NULL, addr)) { error = sctp_copy_local_addr_list(dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if @@ -440,11 +477,21 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, } /* Is this a wildcard address? */ -int sctp_is_any(const union sctp_addr *addr) +int sctp_is_any(struct sock *sk, const union sctp_addr *addr) { - struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family); + unsigned short fam = 0; + struct sctp_af *af; + + /* Try to get the right address family */ + if (addr->sa.sa_family != AF_UNSPEC) + fam = addr->sa.sa_family; + else if (sk) + fam = sk->sk_family; + + af = sctp_get_af_specific(fam); if (!af) return 0; + return af->is_any(addr); } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e39a0cdef184..4c8d9f45ce09 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -103,6 +103,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the CHUNKS parameter */ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; + auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t)); /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks @@ -110,8 +111,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (sctp_addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; - auth_chunks->param_hdr.length = - htons(sizeof(sctp_paramhdr_t) + 2); + auth_chunks->param_hdr.length += htons(2); } } diff --git a/net/sctp/input.c b/net/sctp/input.c index ca6b022b1df2..a49fa80b57b9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -61,6 +61,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> +#include <net/net_namespace.h> /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); @@ -82,8 +83,8 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sctphdr *sh = sctp_hdr(skb); - __u32 cmp = ntohl(sh->checksum); - __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); + __be32 cmp = sh->checksum; + __be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); for (; list; list = list->next) val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), @@ -430,6 +431,9 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; + struct sctp_init_chunk *chunkhdr; + __u32 vtag = ntohl(sctphdr->vtag); + int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -451,8 +455,28 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, sk = asoc->base.sk; - if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + /* RFC 4960, Appendix C. ICMP Handling + * + * ICMP6) An implementation MUST validate that the Verification Tag + * contained in the ICMP message matches the Verification Tag of + * the peer. If the Verification Tag is not 0 and does NOT + * match, discard the ICMP message. If it is 0 and the ICMP + * message contains enough bytes to verify that the chunk type is + * an INIT chunk and that the Initiate Tag matches the tag of the + * peer, continue with ICMP7. If the ICMP message is too short + * or the chunk type or the Initiate Tag does not match, silently + * discard the packet. + */ + if (vtag == 0) { + chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr + + sizeof(struct sctphdr)); + if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) + + sizeof(__be32) || + chunkhdr->chunk_hdr.type != SCTP_CID_INIT || + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + goto out; + } + } else if (vtag != asoc->c.peer_vtag) { goto out; } @@ -462,7 +486,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -511,7 +535,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) int err; if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); return; } @@ -525,7 +549,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a2f4d4d51593..4124bbb99947 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -156,7 +156,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); goto out; } @@ -195,8 +195,7 @@ out: } /* Based on tcp_v6_xmit() in tcp_ipv6.c. */ -static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport, - int ipfragok) +static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) { struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); @@ -231,7 +230,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport, SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip6_xmit(sk, skb, &fl, np->opt, ipfragok); + if (!(transport->param_flags & SPP_PMTUD_ENABLE)) + skb->local_df = 1; + + return ip6_xmit(sk, skb, &fl, np->opt, 0); } /* Returns the dst cache entry for the given source and destination ip @@ -317,7 +319,8 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, __func__, asoc, dst, NIP6(daddr->v6.sin6_addr)); if (!asoc) { - ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, + ipv6_dev_get_saddr(sock_net(sctp_opt2sk(sk)), + dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); @@ -818,7 +821,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) return 1; /* v4-mapped-v6 addresses */ case AF_INET: - if (!__ipv6_only_sock(sctp_opt2sk(sp)) && sp->v4mapped) + if (!__ipv6_only_sock(sctp_opt2sk(sp))) return 1; default: return 0; @@ -834,14 +837,20 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, struct sctp_sock *opt) { struct sctp_af *af1, *af2; + struct sock *sk = sctp_opt2sk(opt); af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); if (!af1 || !af2) return 0; + + /* If the socket is IPv6 only, v4 addrs will not match */ + if (__ipv6_only_sock(sk) && af1 != af2) + return 0; + /* Today, wildcard AF_INET/AF_INET6. */ - if (sctp_is_any(addr1) || sctp_is_any(addr2)) + if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family != addr2->sa.sa_family) @@ -876,7 +885,11 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) return 0; } dev_put(dev); + } else if (type == IPV6_ADDR_MAPPED) { + if (!opt->v4mapped) + return 0; } + af = opt->pf->af; } return af->available(addr, opt); @@ -919,9 +932,12 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, __be16 *types) { - types[0] = SCTP_PARAM_IPV4_ADDRESS; - types[1] = SCTP_PARAM_IPV6_ADDRESS; - return 2; + types[0] = SCTP_PARAM_IPV6_ADDRESS; + if (!opt || !ipv6_only_sock(sctp_opt2sk(opt))) { + types[1] = SCTP_PARAM_IPV4_ADDRESS; + return 2; + } + return 1; } static const struct proto_ops inet6_seqpacket_ops = { diff --git a/net/sctp/output.c b/net/sctp/output.c index 6d45bae93b46..c3f417f7ec6e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -50,6 +50,7 @@ #include <linux/init.h> #include <net/inet_ecn.h> #include <net/icmp.h> +#include <net/net_namespace.h> #ifndef TEST_FRAME #include <net/tcp.h> @@ -157,7 +158,8 @@ void sctp_packet_free(struct sctp_packet *packet) * packet can be sent only after receiving the COOKIE_ACK. */ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) + struct sctp_chunk *chunk, + int one_packet) { sctp_xmit_t retval; int error = 0; @@ -175,7 +177,9 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, /* If we have an empty packet, then we can NOT ever * return PMTU_FULL. */ - retval = sctp_packet_append_chunk(packet, chunk); + if (!one_packet) + retval = sctp_packet_append_chunk(packet, + chunk); } break; @@ -361,7 +365,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - __u32 crc32 = 0; + __be32 crc32 = __constant_cpu_to_be32(0); struct sk_buff *nskb; struct sctp_chunk *chunk, *tmp; struct sock *sk; @@ -529,12 +533,13 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!(dst->dev->features & NETIF_F_NO_CSUM)) { crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); - } + } else + nskb->ip_summed = CHECKSUM_UNNECESSARY; /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. */ - sh->checksum = htonl(crc32); + sh->checksum = crc32; /* IP layer ECN support * From RFC 2481 @@ -582,17 +587,15 @@ int sctp_packet_transmit(struct sctp_packet *packet) SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", nskb->len); - if (tp->param_flags & SPP_PMTUD_ENABLE) - (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); - else - (*tp->af_specific->sctp_xmit)(nskb, tp, 1); + nskb->local_df = packet->ipfragok; + (*tp->af_specific->sctp_xmit)(nskb, tp); out: packet->size = packet->overhead; return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the @@ -696,7 +699,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (chunk->fast_retransmit <= 0) + if (chunk->fast_retransmit != SCTP_NEED_FRTX) if (transport->flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index ace6770e9048..247ebc95c1e5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -71,6 +71,8 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout); + /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, struct sctp_chunk *ch) @@ -418,7 +420,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * be added to the retransmit queue. */ if ((reason == SCTP_RTXR_FAST_RTX && - (chunk->fast_retransmit > 0)) || + (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* If this chunk was sent less then 1 rto ago, do not * retransmit this chunk, but give the peer time @@ -648,8 +650,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. */ - if (chunk->fast_retransmit > 0) - chunk->fast_retransmit = -1; + if (chunk->fast_retransmit == SCTP_NEED_FRTX) + chunk->fast_retransmit = SCTP_DONT_FRTX; /* Force start T3-rtx timer when fast retransmitting * the earliest outstanding TSN @@ -678,8 +680,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, */ if (rtx_timeout || fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; + if (chunk1->fast_retransmit == SCTP_NEED_FRTX) + chunk1->fast_retransmit = SCTP_DONT_FRTX; } } @@ -702,6 +704,7 @@ int sctp_outq_uncork(struct sctp_outq *q) return error; } + /* * Try to flush an outqueue. * @@ -711,7 +714,7 @@ int sctp_outq_uncork(struct sctp_outq *q) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) +static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -725,6 +728,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_xmit_t status; int error = 0; int start_timer = 0; + int one_packet = 0; /* These transports have chunks to send. */ struct list_head transport_list; @@ -830,20 +834,33 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (sctp_test_T_bit(chunk)) { packet->vtag = asoc->c.my_vtag; } - case SCTP_CID_SACK: - case SCTP_CID_HEARTBEAT: + /* The following chunks are "response" chunks, i.e. + * they are generated in response to something we + * received. If we are sending these, then we can + * send only 1 packet containing these chunks. + */ case SCTP_CID_HEARTBEAT_ACK: - case SCTP_CID_SHUTDOWN: case SCTP_CID_SHUTDOWN_ACK: - case SCTP_CID_ERROR: - case SCTP_CID_COOKIE_ECHO: case SCTP_CID_COOKIE_ACK: - case SCTP_CID_ECN_ECNE: + case SCTP_CID_COOKIE_ECHO: + case SCTP_CID_ERROR: case SCTP_CID_ECN_CWR: - case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: + one_packet = 1; + /* Fall throught */ + + case SCTP_CID_SACK: + case SCTP_CID_HEARTBEAT: + case SCTP_CID_SHUTDOWN: + case SCTP_CID_ECN_ECNE: + case SCTP_CID_ASCONF: case SCTP_CID_FWD_TSN: - sctp_packet_transmit_chunk(packet, chunk); + status = sctp_packet_transmit_chunk(packet, chunk, + one_packet); + if (status != SCTP_XMIT_OK) { + /* put the chunk back */ + list_add(&chunk->list, &q->control_chunk_list); + } break; default: @@ -974,7 +991,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) atomic_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ - status = sctp_packet_transmit_chunk(packet, chunk); + status = sctp_packet_transmit_chunk(packet, chunk, 0); switch (status) { case SCTP_XMIT_PMTU_FULL: @@ -1112,12 +1129,13 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) unsigned outstanding; struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; + int gap_ack_blocks; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; sack_ctsn = ntohl(sack->cum_tsn_ack); - + gap_ack_blocks = ntohs(sack->num_gap_ack_blocks); /* * SFR-CACC algorithm: * On receipt of a SACK the sender SHOULD execute the @@ -1127,35 +1145,38 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for * all destinations. - */ - if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { - primary->cacc.changeover_active = 0; - list_for_each_entry(transport, transport_list, - transports) { - transport->cacc.cycling_changeover = 0; - } - } - - /* - * SFR-CACC algorithm: * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE * is set the receiver of the SACK MUST take the following actions: * * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. + * + * Only bother if changeover_active is set. Otherwise, this is + * totally suboptimal to do on every SACK. */ - if (sack->num_gap_ack_blocks && - primary->cacc.changeover_active) { - list_for_each_entry(transport, transport_list, transports) { - transport->cacc.cacc_saw_newack = 0; + if (primary->cacc.changeover_active) { + u8 clear_cycling = 0; + + if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { + primary->cacc.changeover_active = 0; + clear_cycling = 1; + } + + if (clear_cycling || gap_ack_blocks) { + list_for_each_entry(transport, transport_list, + transports) { + if (clear_cycling) + transport->cacc.cycling_changeover = 0; + if (gap_ack_blocks) + transport->cacc.cacc_saw_newack = 0; + } } } /* Get the highest TSN in the sack. */ highest_tsn = sack_ctsn; - if (sack->num_gap_ack_blocks) - highest_tsn += - ntohs(frags[ntohs(sack->num_gap_ack_blocks) - 1].gab.end); + if (gap_ack_blocks) + highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); if (TSN_lt(asoc->highest_sacked, highest_tsn)) { highest_new_tsn = highest_tsn; @@ -1164,11 +1185,11 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) highest_new_tsn = sctp_highest_new_tsn(sack, asoc); } + /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); - sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1187,9 +1208,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } - list_for_each_entry(transport, transport_list, transports) { - sctp_mark_missing(q, &transport->transmitted, transport, - highest_new_tsn, count_of_newacks); + if (gap_ack_blocks) { + list_for_each_entry(transport, transport_list, transports) + sctp_mark_missing(q, &transport->transmitted, transport, + highest_new_tsn, count_of_newacks); } /* Move the Cumulative TSN Ack Point if appropriate. */ @@ -1239,7 +1261,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * Make sure the empty queue handler will get run later. */ q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->control_chunk_list) && list_empty(&q->retransmit)); if (!q->empty) goto finish; @@ -1635,7 +1656,7 @@ static void sctp_mark_missing(struct sctp_outq *q, * chunk if it has NOT been fast retransmitted or marked for * fast retransmit already. */ - if (!chunk->fast_retransmit && + if (chunk->fast_retransmit == SCTP_CAN_FRTX && !chunk->tsn_gap_acked && TSN_lt(tsn, highest_new_tsn_in_sack)) { @@ -1660,7 +1681,7 @@ static void sctp_mark_missing(struct sctp_outq *q, */ if (chunk->tsn_missing_report >= 3) { - chunk->fast_retransmit = 1; + chunk->fast_retransmit = SCTP_NEED_FRTX; do_fast_retransmit = 1; } } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0aba759cb9b7..f268910620be 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -383,3 +383,139 @@ void sctp_assocs_proc_exit(void) { remove_proc_entry("assocs", proc_net_sctp); } + +static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= sctp_assoc_hashsize) + return NULL; + + if (*pos < 0) + *pos = 0; + + if (*pos == 0) + seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " + "REM_ADDR_RTX START\n"); + + return (void *)pos; +} + +static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + if (++*pos >= sctp_assoc_hashsize) + return NULL; + + return pos; +} + +static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) +{ + return; +} + +static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) +{ + struct sctp_hashbucket *head; + struct sctp_ep_common *epb; + struct sctp_association *assoc; + struct hlist_node *node; + struct sctp_transport *tsp; + int hash = *(loff_t *)v; + + if (hash >= sctp_assoc_hashsize) + return -ENOMEM; + + head = &sctp_assoc_hashtable[hash]; + sctp_local_bh_disable(); + read_lock(&head->lock); + sctp_for_each_hentry(epb, node, &head->chain) { + assoc = sctp_assoc(epb); + list_for_each_entry(tsp, &assoc->peer.transport_addr_list, + transports) { + /* + * The remote address (ADDR) + */ + tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); + seq_printf(seq, " "); + + /* + * The association ID (ASSOC_ID) + */ + seq_printf(seq, "%d ", tsp->asoc->assoc_id); + + /* + * If the Heartbeat is active (HB_ACT) + * Note: 1 = Active, 0 = Inactive + */ + seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); + + /* + * Retransmit time out (RTO) + */ + seq_printf(seq, "%lu ", tsp->rto); + + /* + * Maximum path retransmit count (PATH_MAX_RTX) + */ + seq_printf(seq, "%d ", tsp->pathmaxrxt); + + /* + * remote address retransmit count (REM_ADDR_RTX) + * Note: We don't have a way to tally this at the moment + * so lets just leave it as zero for the moment + */ + seq_printf(seq, "0 "); + + /* + * remote address start time (START). This is also not + * currently implemented, but we can record it with a + * jiffies marker in a subsequent patch + */ + seq_printf(seq, "0"); + + seq_printf(seq, "\n"); + } + } + + read_unlock(&head->lock); + sctp_local_bh_enable(); + + return 0; + +} + +static const struct seq_operations sctp_remaddr_ops = { + .start = sctp_remaddr_seq_start, + .next = sctp_remaddr_seq_next, + .stop = sctp_remaddr_seq_stop, + .show = sctp_remaddr_seq_show, +}; + +/* Cleanup the proc fs entry for 'remaddr' object. */ +void sctp_remaddr_proc_exit(void) +{ + remove_proc_entry("remaddr", proc_net_sctp); +} + +static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sctp_remaddr_ops); +} + +static const struct file_operations sctp_remaddr_seq_fops = { + .open = sctp_remaddr_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +int __init sctp_remaddr_proc_init(void) +{ + struct proc_dir_entry *p; + + p = create_proc_entry("remaddr", S_IRUGO, proc_net_sctp); + if (!p) + return -ENOMEM; + p->proc_fops = &sctp_remaddr_seq_fops; + + return 0; +} diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9258dfe784ae..0b65354aaf64 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -52,6 +52,8 @@ #include <linux/inetdevice.h> #include <linux/seq_file.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/ip.h> @@ -64,9 +66,12 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_sctp; +#endif + struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -97,6 +102,7 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { +#ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; ent = proc_mkdir("sctp", init_net.proc_net); @@ -113,9 +119,13 @@ static __init int sctp_proc_init(void) goto out_eps_proc_init; if (sctp_assocs_proc_init()) goto out_assocs_proc_init; + if (sctp_remaddr_proc_init()) + goto out_remaddr_proc_init; return 0; +out_remaddr_proc_init: + sctp_assocs_proc_exit(); out_assocs_proc_init: sctp_eps_proc_exit(); out_eps_proc_init: @@ -127,6 +137,9 @@ out_snmp_proc_init: } out_nomem: return -ENOMEM; +#else + return 0; +#endif /* CONFIG_PROC_FS */ } /* Clean up the proc fs entry for the SCTP protocol. @@ -135,14 +148,17 @@ out_nomem: */ static void sctp_proc_exit(void) { +#ifdef CONFIG_PROC_FS sctp_snmp_proc_exit(); sctp_eps_proc_exit(); sctp_assocs_proc_exit(); + sctp_remaddr_proc_exit(); if (proc_net_sctp) { proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +#endif } /* Private helper to extract ipv4 address and stash them in @@ -367,6 +383,10 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp, const struct sk_buff *skb) { + /* IPv4 addresses not allowed */ + if (sp && ipv6_only_sock(sctp_opt2sk(sp))) + return 0; + /* Is this a non-unicast address or a unusable SCTP address? */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) return 0; @@ -390,6 +410,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) !sysctl_ip_nonlocal_bind) return 0; + if (ipv6_only_sock(sctp_opt2sk(sp))) + return 0; + return 1; } @@ -645,7 +668,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct sctp_sockaddr_entry *temp; int found = 0; - if (dev_net(ifa->ifa_dev->dev) != &init_net) + if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) return NOTIFY_DONE; switch (ev) { @@ -839,16 +862,21 @@ static int sctp_inet_supported_addrs(const struct sctp_sock *opt, /* Wrapper routine that calls the ip transmit routine. */ static inline int sctp_v4_xmit(struct sk_buff *skb, - struct sctp_transport *transport, int ipfragok) + struct sctp_transport *transport) { + struct inet_sock *inet = inet_sk(skb->sk); + SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, " "src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n", __func__, skb, skb->len, NIPQUAD(skb->rtable->rt_src), NIPQUAD(skb->rtable->rt_dst)); + inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? + IP_PMTUDISC_DO : IP_PMTUDISC_DONT; + SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, ipfragok); + return ip_queue_xmit(skb, 0); } static struct sctp_af sctp_af_inet; @@ -1059,6 +1087,7 @@ SCTP_STATIC __init int sctp_init(void) int status = -EINVAL; unsigned long goal; unsigned long limit; + unsigned long nr_pages; int max_share; int order; @@ -1154,8 +1183,9 @@ SCTP_STATIC __init int sctp_init(void) * Note this initalizes the data in sctpv6_prot too * Unabashedly stolen from tcp_init */ - limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_sctp_mem[0] = limit / 4 * 3; sysctl_sctp_mem[1] = limit; @@ -1165,7 +1195,7 @@ SCTP_STATIC __init int sctp_init(void) limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index bbc7107c86cf..fd8acb48c3f2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -702,12 +702,14 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) __u32 ctsn; __u16 num_gabs, num_dup_tsns; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; + struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); /* How much room is needed in the chunk? */ - num_gabs = sctp_tsnmap_num_gabs(map); + num_gabs = sctp_tsnmap_num_gabs(map, gabs); num_dup_tsns = sctp_tsnmap_num_dups(map); /* Initialize the SACK header. */ @@ -763,7 +765,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) /* Add the gap ack block information. */ if (num_gabs) sctp_addto_chunk(retval, sizeof(__u32) * num_gabs, - sctp_tsnmap_get_gabs(map)); + gabs); /* Add the duplicate TSN information. */ if (num_dup_tsns) @@ -1012,6 +1014,29 @@ end: return retval; } +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) +{ + struct sctp_chunk *retval; + static const char error[] = "The following parameter had invalid length:"; + size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + sizeof(sctp_paramhdr_t); + + retval = sctp_make_abort(asoc, chunk, payload_len); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(retval, sizeof(error), error); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, @@ -1188,7 +1213,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, */ retval->tsn_missing_report = 0; retval->tsn_gap_acked = 0; - retval->fast_retransmit = 0; + retval->fast_retransmit = SCTP_CAN_FRTX; /* If this is a fragmented message, track all fragments * of the message (for SEND_FAILED). @@ -1782,11 +1807,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { - static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = WORD_ROUND(sizeof(error)) + - sizeof(sctp_paramhdr_t); - - /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ @@ -1794,14 +1814,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); - sctp_addto_chunk(*errp, sizeof(error), error); - sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); - } + *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } @@ -1886,11 +1899,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - asoc->peer.auth_capable = 1; + if (sctp_auth_enable) + asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - asoc->peer.asconf_capable = 1; + if (sctp_addip_enable) + asoc->peer.asconf_capable = 1; break; default: break; @@ -2275,8 +2290,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, } /* Set up the TSN tracking pieces. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, gfp)) + goto clean_up; /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number * @@ -2319,12 +2335,10 @@ clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); - list_del_init(pos); - sctp_transport_free(transport); + if (transport->state != SCTP_ACTIVE) + sctp_assoc_rm_peer(asoc, transport); } - asoc->peer.transport_count = 0; - nomem: return 0; } @@ -2364,8 +2378,13 @@ static int sctp_process_param(struct sctp_association *asoc, case SCTP_PARAM_IPV6_ADDRESS: if (PF_INET6 != asoc->base.sk->sk_family) break; - /* Fall through. */ + goto do_addr_param; + case SCTP_PARAM_IPV4_ADDRESS: + /* v4 addresses are not allowed on v6-only socket */ + if (ipv6_only_sock(asoc->base.sk)) + break; +do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); @@ -2451,10 +2470,13 @@ static int sctp_process_param(struct sctp_association *asoc, break; case SCTP_PARAM_ADAPTATION_LAYER_IND: - asoc->peer.adaptation_ind = param.aind->adaptation_ind; + asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: + if (!sctp_addip_enable) + goto fall_through; + addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 23a9f1a95b7d..e1d6076b4f59 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -190,20 +190,28 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, * unacknowledged DATA chunk. ... */ if (!asoc->peer.sack_needed) { - /* We will need a SACK for the next packet. */ - asoc->peer.sack_needed = 1; + asoc->peer.sack_cnt++; /* Set the SACK delay timeout based on the * SACK delay for the last transport * data was received from, or the default * for the association. */ - if (trans) + if (trans) { + /* We will need a SACK for the next packet. */ + if (asoc->peer.sack_cnt >= trans->sackfreq - 1) + asoc->peer.sack_needed = 1; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = trans->sackdelay; - else + } else { + /* We will need a SACK for the next packet. */ + if (asoc->peer.sack_cnt >= asoc->sackfreq - 1) + asoc->peer.sack_needed = 1; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; + } /* Restart the SACK timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, @@ -216,6 +224,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, goto nomem; asoc->peer.sack_needed = 0; + asoc->peer.sack_cnt = 0; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(sack)); @@ -655,7 +664,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, struct sctp_sackhdr *sackh) { - int err; + int err = 0; if (sctp_outq_sack(&asoc->outqueue, sackh)) { /* There are no more TSNs awaiting SACK. */ @@ -663,11 +672,6 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); - } else { - /* Windows may have opened, so we need - * to check if we have DATA to transmit - */ - err = sctp_outq_flush(&asoc->outqueue, 0); } return err; @@ -885,6 +889,35 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, sctp_ulpq_tail_event(&asoc->ulpq, ev); } + +static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, + sctp_event_timeout_t timer, + char *name) +{ + struct sctp_transport *t; + + t = asoc->init_last_sent_to; + asoc->init_err_counter++; + + if (t->init_sent_count > (asoc->init_cycle + 1)) { + asoc->timeouts[timer] *= 2; + if (asoc->timeouts[timer] > asoc->max_init_timeo) { + asoc->timeouts[timer] = asoc->max_init_timeo; + } + asoc->init_cycle++; + SCTP_DEBUG_PRINTK( + "T1 %s Timeout adjustment" + " init_err_counter: %d" + " cycle: %d" + " timeout: %ld\n", + name, + asoc->init_err_counter, + asoc->init_cycle, + asoc->timeouts[timer]); + } + +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1119,7 +1152,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ - sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32); + error = sctp_tsnmap_mark(&asoc->peer.tsn_map, + cmd->obj.u32); break; case SCTP_CMD_REPORT_FWDTSN: @@ -1192,6 +1226,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(cmd->obj.ptr)); + if (new_obj->transport) { + new_obj->transport->init_sent_count++; + asoc->init_last_sent_to = new_obj->transport; + } + /* FIXME - Eventually come up with a cleaner way to * enabling COOKIE-ECHO + DATA bundling during * multihoming stale cookie scenarios, the following @@ -1341,26 +1380,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - t = asoc->init_last_sent_to; - asoc->init_err_counter++; - - if (t->init_sent_count > (asoc->init_cycle + 1)) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = - asoc->max_init_timeo; - } - asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 INIT Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]); - } + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_INIT, + "INIT"); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); @@ -1373,20 +1395,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - asoc->init_err_counter++; - - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = - asoc->max_init_timeo; - } - SCTP_DEBUG_PRINTK( - "T1 COOKIE Timeout adjustment" - " init_err_counter: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]); + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_COOKIE, + "COOKIE"); /* If we've sent any data bundled with * COOKIE-ECHO we need to resend. @@ -1418,6 +1429,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_INIT_COUNTER_RESET: asoc->init_err_counter = 0; asoc->init_cycle = 0; + list_for_each_entry(t, &asoc->peer.transport_addr_list, + transports) { + t->init_sent_count = 0; + } break; case SCTP_CMD_REPORT_DUP: @@ -1472,8 +1487,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_DISCARD_PACKET: - /* We need to discard the whole packet. */ + /* We need to discard the whole packet. + * Uncork the queue since there might be + * responses pending + */ chunk->pdiscard = 1; + if (asoc) { + sctp_outq_uncork(&asoc->outqueue); + local_cork = 0; + } break; case SCTP_CMD_RTO_PENDING: @@ -1544,8 +1566,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, } out: - if (local_cork) - sctp_outq_uncork(&asoc->outqueue); + /* If this is in response to a received chunk, wait until + * we are done with the packet to open the queue so that we don't + * send multiple packets in response to a single request. + */ + if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { + if (chunk->end_of_packet || chunk->singleton) + sctp_outq_uncork(&asoc->outqueue); + } else if (local_cork) + sctp_outq_uncork(&asoc->outqueue); return error; nomem: error = -ENOMEM; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0c9d5a6950fe..d4c3fbc4671e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, + void *arg, void *ext, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( @@ -315,8 +315,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. @@ -635,8 +637,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* Make sure that the COOKIE_ECHO chunk has a valid length. * In this case, we check that we have enough for at least a @@ -795,8 +799,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - /* This will send the COOKIE ACK */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); @@ -883,7 +885,6 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); /* It may also notify its ULP about the successful * establishment of the association with a Communication Up @@ -1781,7 +1782,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); /* RFC 2960 5.1 Normal Establishment of an Association * @@ -1898,12 +1898,13 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, } } - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); @@ -1911,9 +1912,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; nomem: @@ -2082,10 +2080,6 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); - /* Stop the T5-shutdown guard timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); } @@ -3388,6 +3382,8 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); } @@ -3431,7 +3427,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, + return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3439,8 +3435,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value * the endpoint stored in a new association variable @@ -3548,8 +3544,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; @@ -3970,9 +3966,6 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, return sctp_sf_pdiscard(ep, asoc, type, arg, commands); break; case SCTP_CID_ACTION_DISCARD_ERR: - /* Discard the packet. */ - sctp_sf_pdiscard(ep, asoc, type, arg, commands); - /* Generate an ERROR chunk as response. */ hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, @@ -3982,6 +3975,9 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); } + + /* Discard the packet. */ + sctp_sf_pdiscard(ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; break; case SCTP_CID_ACTION_SKIP: @@ -4192,11 +4188,10 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); } -discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem_pkt: @@ -4246,12 +4241,36 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) { - static const char err_str[] = "The following parameter had invalid length:"; + void *arg, void *ext, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + struct sctp_paramhdr *param = ext; + struct sctp_chunk *abort = NULL; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, - sizeof(err_str)); + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + + /* Make the abort chunk. */ + abort = sctp_make_violation_paramlen(asoc, chunk, param); + if (!abort) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + return SCTP_DISPOSITION_ABORT; +nomem: + return SCTP_DISPOSITION_NOMEM; } /* Handle a protocol violation when the peer trying to advance the @@ -4523,13 +4542,6 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, @@ -4974,6 +4986,13 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); + /* RFC 4960 Section 9.2 + * The sender of the SHUTDOWN MAY also start an overall guard timer + * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5285,6 +5304,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep if (!repl) return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); /* Issue a sideeffect to do the needed accounting. */ sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); @@ -5412,7 +5433,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5468,6 +5489,9 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + return SCTP_DISPOSITION_DELETE_TCB; nomem: return SCTP_DISPOSITION_NOMEM; @@ -5500,12 +5524,6 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, @@ -5899,12 +5917,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, return SCTP_IERROR_NO_DATA; } - /* If definately accepting the DATA chunk, record its TSN, otherwise - * wait for renege processing. - */ - if (SCTP_CMD_CHUNK_ULP == deliver) - sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); - chunk->data_accepted = 1; /* Note: Some chunks may get overcounted (if we drop) or overcounted @@ -5924,6 +5936,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { + /* Mark tsn as received even though we drop it */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); + err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, sizeof(data_hdr->stream)); diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index d991237fb400..dd4ddc40c0ad 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -897,7 +897,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0dbcde6758ea..a1b904529d5e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -116,7 +116,7 @@ static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; static atomic_t sctp_sockets_allocated; -static void sctp_enter_memory_pressure(void) +static void sctp_enter_memory_pressure(struct sock *sk) { sctp_memory_pressure = 1; } @@ -308,9 +308,16 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; + /* V4 mapped address are really of AF_INET family */ + if (addr->sa.sa_family == AF_INET6 && + ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { + if (!opt->pf->af_supported(AF_INET, opt)) + return NULL; + } else { + /* Does this PF support this AF? */ + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + } /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); @@ -370,18 +377,19 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; + /* See if the address matches any of the addresses we may have + * already bound before checking against other endpoints. + */ + if (sctp_bind_addr_match(bp, addr, sp)) + return -EINVAL; + /* Make sure we are allowed to bind here. * The function sctp_get_port_local() does duplicate address * detection. */ addr->v4.sin_port = htons(snum); if ((ret = sctp_get_port_local(sk, addr))) { - if (ret == (long) sk) { - /* This endpoint has a conflicting address. */ - return -EINVAL; - } else { - return -EADDRINUSE; - } + return -EADDRINUSE; } /* Refresh ephemeral port. */ @@ -956,7 +964,8 @@ out: */ static int __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, - int addrs_size) + int addrs_size, + sctp_assoc_t *assoc_id) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -1111,6 +1120,8 @@ static int __sctp_connect(struct sock* sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); err = sctp_wait_for_connect(asoc, &timeo); + if (!err && assoc_id) + *assoc_id = asoc->assoc_id; /* Don't free association on exit. */ asoc = NULL; @@ -1128,7 +1139,8 @@ out_free: /* Helper for tunneling sctp_connectx() requests through sctp_setsockopt() * * API 8.9 - * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt); + * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt, + * sctp_assoc_t *asoc); * * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses. * If the sd is an IPv6 socket, the addresses passed can either be IPv4 @@ -1144,8 +1156,10 @@ out_free: * representation is termed a "packed array" of addresses). The caller * specifies the number of addresses in the array with addrcnt. * - * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns - * -1, and sets errno to the appropriate error code. + * On success, sctp_connectx() returns 0. It also sets the assoc_id to + * the association id of the new association. On failure, sctp_connectx() + * returns -1, and sets errno to the appropriate error code. The assoc_id + * is not touched by the kernel. * * For SCTP, the port given in each socket address must be the same, or * sctp_connectx() will fail, setting errno to EINVAL. @@ -1182,11 +1196,12 @@ out_free: * addrs The pointer to the addresses in user land * addrssize Size of the addrs buffer * - * Returns 0 if ok, <0 errno code on error. + * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, +SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, - int addrs_size) + int addrs_size, + sctp_assoc_t *assoc_id) { int err = 0; struct sockaddr *kaddrs; @@ -1209,13 +1224,46 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, if (__copy_from_user(kaddrs, addrs, addrs_size)) { err = -EFAULT; } else { - err = __sctp_connect(sk, kaddrs, addrs_size); + err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); } kfree(kaddrs); + return err; } +/* + * This is an older interface. It's kept for backward compatibility + * to the option that doesn't provide association id. + */ +SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) +{ + return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); +} + +/* + * New interface for the API. The since the API is done with a socket + * option, to make it simple we feed back the association id is as a return + * indication to the call. Error is always negative and association id is + * always positive. + */ +SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) +{ + sctp_assoc_t assoc_id = 0; + int err = 0; + + err = __sctp_setsockopt_connectx(sk, addrs, addrs_size, &assoc_id); + + if (err) + return err; + else + return assoc_id; +} + /* API 3.1.4 close() - UDP Style Syntax * Applications use close() to perform graceful shutdown (as described in * Section 10.1 of [SCTP]) on ALL the associations currently represented @@ -2261,7 +2309,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -2305,74 +2353,98 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return 0; } -/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) - * - * This options will get or set the delayed ack timer. The time is set - * in milliseconds. If the assoc_id is 0, then this sets or gets the - * endpoints default delayed ack timer value. If the assoc_id field is - * non-zero, then the set or get effects the specified association. - * - * struct sctp_assoc_value { - * sctp_assoc_t assoc_id; - * uint32_t assoc_value; - * }; +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + * + * struct sctp_sack_info { + * sctp_assoc_t sack_assoc_id; + * uint32_t sack_delay; + * uint32_t sack_freq; + * }; * - * assoc_id - This parameter, indicates which association the - * user is preforming an action upon. Note that if - * this field's value is zero then the endpoints - * default value is changed (effecting future - * associations only). + * sack_assoc_id - This parameter, indicates which association the user + * is performing an action upon. Note that if this field's value is + * zero then the endpoints default value is changed (effecting future + * associations only). * - * assoc_value - This parameter contains the number of milliseconds - * that the user is requesting the delayed ACK timer - * be set to. Note that this value is defined in - * the standard to be between 200 and 500 milliseconds. + * sack_delay - This parameter contains the number of milliseconds that + * the user is requesting the delayed ACK timer be set to. Note that + * this value is defined in the standard to be between 200 and 500 + * milliseconds. * - * Note: a value of zero will leave the value alone, - * but disable SACK delay. A non-zero value will also - * enable SACK delay. + * sack_freq - This parameter contains the number of packets that must + * be received before a sack is sent without waiting for the delay + * timer to expire. The default value for this is 2, setting this + * value to 1 will disable the delayed sack algorithm. */ -static int sctp_setsockopt_delayed_ack_time(struct sock *sk, +static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, int optlen) { - struct sctp_assoc_value params; + struct sctp_sack_info params; struct sctp_transport *trans = NULL; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (optlen != sizeof(struct sctp_assoc_value)) - return - EINVAL; + if (optlen == sizeof(struct sctp_sack_info)) { + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; + if (params.sack_delay == 0 && params.sack_freq == 0) + return 0; + } else if (optlen == sizeof(struct sctp_assoc_value)) { + printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + "in delayed_ack socket option deprecated\n"); + printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + if (params.sack_delay == 0) + params.sack_freq = 1; + else + params.sack_freq = 0; + } else + return - EINVAL; /* Validate value parameter. */ - if (params.assoc_value > 500) + if (params.sack_delay > 500) return -EINVAL; - /* Get association, if assoc_id != 0 and the socket is a one + /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then * the id was invalid. */ - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + asoc = sctp_id2assoc(sk, params.sack_assoc_id); + if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (params.assoc_value) { + if (params.sack_delay) { if (asoc) { asoc->sackdelay = - msecs_to_jiffies(params.assoc_value); + msecs_to_jiffies(params.sack_delay); asoc->param_flags = (asoc->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } else { - sp->sackdelay = params.assoc_value; + sp->sackdelay = params.sack_delay; sp->param_flags = (sp->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } - } else { + } + + if (params.sack_freq == 1) { if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_SACKDELAY) | @@ -2382,22 +2454,40 @@ static int sctp_setsockopt_delayed_ack_time(struct sock *sk, (sp->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; } + } else if (params.sack_freq > 1) { + if (asoc) { + asoc->sackfreq = params.sack_freq; + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } else { + sp->sackfreq = params.sack_freq; + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } } /* If change is for association, also apply to each transport. */ if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { - if (params.assoc_value) { + if (params.sack_delay) { trans->sackdelay = - msecs_to_jiffies(params.assoc_value); + msecs_to_jiffies(params.sack_delay); trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; - } else { + } + if (params.sack_freq == 1) { trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; + } else if (params.sack_freq > 1) { + trans->sackfreq = params.sack_freq; + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; } } } @@ -2965,6 +3055,9 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, { struct sctp_authchunk val; + if (!sctp_auth_enable) + return -EACCES; + if (optlen != sizeof(struct sctp_authchunk)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -2993,8 +3086,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, int optlen) { struct sctp_hmacalgo *hmacs; + u32 idents; int err; + if (!sctp_auth_enable) + return -EACCES; + if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; @@ -3007,8 +3104,9 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, goto out; } - if (hmacs->shmac_num_idents == 0 || - hmacs->shmac_num_idents > SCTP_AUTH_NUM_HMACS) { + idents = hmacs->shmac_num_idents; + if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS || + (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) { err = -EINVAL; goto out; } @@ -3033,6 +3131,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_association *asoc; int ret; + if (!sctp_auth_enable) + return -EACCES; + if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; @@ -3045,6 +3146,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } + if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { + ret = -EINVAL; + goto out; + } + asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { ret = -EINVAL; @@ -3070,6 +3176,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, struct sctp_authkeyid val; struct sctp_association *asoc; + if (!sctp_auth_enable) + return -EACCES; + if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3095,6 +3204,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, struct sctp_authkeyid val; struct sctp_association *asoc; + if (!sctp_auth_enable) + return -EACCES; + if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3164,10 +3276,18 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, optlen, SCTP_BINDX_REM_ADDR); break; + case SCTP_SOCKOPT_CONNECTX_OLD: + /* 'optlen' is the size of the addresses buffer. */ + retval = sctp_setsockopt_connectx_old(sk, + (struct sockaddr __user *)optval, + optlen); + break; + case SCTP_SOCKOPT_CONNECTX: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval, - optlen); + retval = sctp_setsockopt_connectx(sk, + (struct sockaddr __user *)optval, + optlen); break; case SCTP_DISABLE_FRAGMENTS: @@ -3186,8 +3306,8 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; - case SCTP_DELAYED_ACK_TIME: - retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen); + case SCTP_DELAYED_ACK: + retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen); @@ -3294,7 +3414,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, /* Pass correct addr len to common routine (so it knows there * is only one address being passed. */ - err = __sctp_connect(sk, addr, af->sockaddr_len); + err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); } sctp_release_sock(sk); @@ -3446,6 +3566,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery sp->sackdelay = sctp_sack_timeout; + sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -3497,7 +3618,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC int sctp_destroy_sock(struct sock *sk) +SCTP_STATIC void sctp_destroy_sock(struct sock *sk) { struct sctp_endpoint *ep; @@ -3507,7 +3628,6 @@ SCTP_STATIC int sctp_destroy_sock(struct sock *sk) ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); atomic_dec(&sctp_sockets_allocated); - return 0; } /* API 4.1.7 shutdown() - TCP Style Syntax @@ -3812,7 +3932,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = sock_map_fd(newsock); + retval = sock_map_fd(newsock, 0); if (retval < 0) { sock_release(newsock); goto out; @@ -3942,7 +4062,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -3999,70 +4119,91 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, return 0; } -/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) - * - * This options will get or set the delayed ack timer. The time is set - * in milliseconds. If the assoc_id is 0, then this sets or gets the - * endpoints default delayed ack timer value. If the assoc_id field is - * non-zero, then the set or get effects the specified association. - * - * struct sctp_assoc_value { - * sctp_assoc_t assoc_id; - * uint32_t assoc_value; - * }; +/* + * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) + * + * This option will effect the way delayed acks are performed. This + * option allows you to get or set the delayed ack time, in + * milliseconds. It also allows changing the delayed ack frequency. + * Changing the frequency to 1 disables the delayed sack algorithm. If + * the assoc_id is 0, then this sets or gets the endpoints default + * values. If the assoc_id field is non-zero, then the set or get + * effects the specified association for the one to many model (the + * assoc_id field is ignored by the one to one model). Note that if + * sack_delay or sack_freq are 0 when setting this option, then the + * current values will remain unchanged. + * + * struct sctp_sack_info { + * sctp_assoc_t sack_assoc_id; + * uint32_t sack_delay; + * uint32_t sack_freq; + * }; * - * assoc_id - This parameter, indicates which association the - * user is preforming an action upon. Note that if - * this field's value is zero then the endpoints - * default value is changed (effecting future - * associations only). + * sack_assoc_id - This parameter, indicates which association the user + * is performing an action upon. Note that if this field's value is + * zero then the endpoints default value is changed (effecting future + * associations only). * - * assoc_value - This parameter contains the number of milliseconds - * that the user is requesting the delayed ACK timer - * be set to. Note that this value is defined in - * the standard to be between 200 and 500 milliseconds. + * sack_delay - This parameter contains the number of milliseconds that + * the user is requesting the delayed ACK timer be set to. Note that + * this value is defined in the standard to be between 200 and 500 + * milliseconds. * - * Note: a value of zero will leave the value alone, - * but disable SACK delay. A non-zero value will also - * enable SACK delay. + * sack_freq - This parameter contains the number of packets that must + * be received before a sack is sent without waiting for the delay + * timer to expire. The default value for this is 2, setting this + * value to 1 will disable the delayed sack algorithm. */ -static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, +static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_assoc_value params; + struct sctp_sack_info params; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len < sizeof(struct sctp_assoc_value)) - return - EINVAL; + if (len >= sizeof(struct sctp_sack_info)) { + len = sizeof(struct sctp_sack_info); - len = sizeof(struct sctp_assoc_value); - - if (copy_from_user(¶ms, optval, len)) - return -EFAULT; + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + } else if (len == sizeof(struct sctp_assoc_value)) { + printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + "in delayed_ack socket option deprecated\n"); + printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + } else + return - EINVAL; - /* Get association, if assoc_id != 0 and the socket is a one + /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then * the id was invalid. */ - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + asoc = sctp_id2assoc(sk, params.sack_assoc_id); + if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { /* Fetch association values. */ - if (asoc->param_flags & SPP_SACKDELAY_ENABLE) - params.assoc_value = jiffies_to_msecs( + if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { + params.sack_delay = jiffies_to_msecs( asoc->sackdelay); - else - params.assoc_value = 0; + params.sack_freq = asoc->sackfreq; + + } else { + params.sack_delay = 0; + params.sack_freq = 1; + } } else { /* Fetch socket values. */ - if (sp->param_flags & SPP_SACKDELAY_ENABLE) - params.assoc_value = sp->sackdelay; - else - params.assoc_value = 0; + if (sp->param_flags & SPP_SACKDELAY_ENABLE) { + params.sack_delay = sp->sackdelay; + params.sack_freq = sp->sackfreq; + } else { + params.sack_delay = 0; + params.sack_freq = 1; + } } if (copy_to_user(optval, ¶ms, len)) @@ -4112,6 +4253,8 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " + "socket option deprecated\n"); /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, id); if (!asoc) @@ -4151,6 +4294,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, if (getaddrs.addr_num <= 0) return -EINVAL; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " + "socket option deprecated\n"); + /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) @@ -4244,6 +4390,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound @@ -4265,7 +4414,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { rcu_read_lock(); list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { @@ -4276,6 +4425,11 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; + cnt++; } rcu_read_unlock(); @@ -4316,6 +4470,10 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; memcpy(&temp, &addr->a, sizeof(temp)); if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); @@ -4351,6 +4509,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + if ((PF_INET6 == sk->sk_family) && + inet_v6_ipv6only(sk) && + (AF_INET == addr->a.sa.sa_family)) + continue; memcpy(&temp, &addr->a, sizeof(temp)); if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); @@ -4404,6 +4566,10 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (getaddrs.addr_num <= 0 || getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) return -EINVAL; + + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound @@ -4436,7 +4602,7 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs_old(sk, bp->port, getaddrs.addr_num, addrs, &bytes_copied); @@ -4529,7 +4695,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs(sk, bp->port, addrs, space_left, &bytes_copied); if (cnt < 0) { @@ -5053,19 +5219,29 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; - __u16 param_len; + __u16 data_len = 0; + u32 num_idents; + + if (!sctp_auth_enable) + return -EACCES; hmacs = sctp_sk(sk)->ep->auth_hmacs_list; - param_len = ntohs(hmacs->param_hdr.length); + data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t); - if (len < param_len) + if (len < sizeof(struct sctp_hmacalgo) + data_len) return -EINVAL; + + len = sizeof(struct sctp_hmacalgo) + data_len; + num_idents = data_len / sizeof(u16); + if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, hmacs->hmac_ids, len)) + if (put_user(num_idents, &p->shmac_num_idents)) + return -EFAULT; + if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) return -EFAULT; - return 0; } @@ -5075,6 +5251,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, struct sctp_authkeyid val; struct sctp_association *asoc; + if (!sctp_auth_enable) + return -EACCES; + if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) @@ -5089,6 +5268,12 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, else val.scact_keynumber = sctp_sk(sk)->ep->active_key_id; + len = sizeof(struct sctp_authkeyid); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; } @@ -5099,13 +5284,16 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, struct sctp_authchunks val; struct sctp_association *asoc; struct sctp_chunks_param *ch; - u32 num_chunks; + u32 num_chunks = 0; char __user *to; - if (len <= sizeof(struct sctp_authchunks)) + if (!sctp_auth_enable) + return -EACCES; + + if (len < sizeof(struct sctp_authchunks)) return -EINVAL; - if (copy_from_user(&val, p, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) return -EFAULT; to = p->gauth_chunks; @@ -5114,20 +5302,21 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, return -EINVAL; ch = asoc->peer.peer_chunks; + if (!ch) + goto num; /* See if the user provided enough room for all the data */ num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); if (len < num_chunks) return -EINVAL; - len = num_chunks; - if (put_user(len, optlen)) + if (copy_to_user(to, ch->chunks, num_chunks)) return -EFAULT; +num: + len = sizeof(struct sctp_authchunks) + num_chunks; + if (put_user(len, optlen)) return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; - if (copy_to_user(to, ch->chunks, len)) - return -EFAULT; - return 0; } @@ -5138,13 +5327,16 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, struct sctp_authchunks val; struct sctp_association *asoc; struct sctp_chunks_param *ch; - u32 num_chunks; + u32 num_chunks = 0; char __user *to; - if (len <= sizeof(struct sctp_authchunks)) + if (!sctp_auth_enable) + return -EACCES; + + if (len < sizeof(struct sctp_authchunks)) return -EINVAL; - if (copy_from_user(&val, p, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) return -EFAULT; to = p->gauth_chunks; @@ -5157,17 +5349,21 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, else ch = sctp_sk(sk)->ep->auth_chunk_list; + if (!ch) + goto num; + num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); - if (len < num_chunks) + if (len < sizeof(struct sctp_authchunks) + num_chunks) return -EINVAL; - len = num_chunks; + if (copy_to_user(to, ch->chunks, num_chunks)) + return -EFAULT; +num: + len = sizeof(struct sctp_authchunks) + num_chunks; if (put_user(len, optlen)) return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; - if (copy_to_user(to, ch->chunks, len)) - return -EFAULT; return 0; } @@ -5220,8 +5416,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; - case SCTP_DELAYED_ACK_TIME: - retval = sctp_getsockopt_delayed_ack_time(sk, len, optval, + case SCTP_DELAYED_ACK: + retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; case SCTP_INITMSG: @@ -5441,12 +5637,13 @@ pp_found: struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; - if (reuse && sk2->sk_reuse && - sk2->sk_state != SCTP_SS_LISTENING) + if (sk == sk2 || + (reuse && sk2->sk_reuse && + sk2->sk_state != SCTP_SS_LISTENING)) continue; - if (sctp_bind_addr_match(&ep2->base.bind_addr, addr, - sctp_sk(sk))) { + if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, + sctp_sk(sk2), sctp_sk(sk))) { ret = (long)sk2; goto fail_unlock; } @@ -5559,8 +5756,13 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; - } else + } else { + if (sctp_get_port(sk, inet_sk(sk)->num)) { + sk->sk_state = SCTP_SS_CLOSED; + return -EADDRINUSE; + } sctp_sk(sk)->bind_hash->fastreuse = 0; + } sctp_hash_endpoint(ep); return 0; @@ -5630,7 +5832,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) goto out; /* Allocate HMAC for generating cookie. */ - if (sctp_hmac_alg) { + if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { if (net_ratelimit()) { @@ -5658,7 +5860,8 @@ int sctp_inet_listen(struct socket *sock, int backlog) goto cleanup; /* Store away the transform reference. */ - sctp_sk(sk)->hmac = tfm; + if (!sctp_sk(sk)->hmac) + sctp_sk(sk)->hmac = tfm; out: sctp_release_sock(sk); return err; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3f34f61221ec..e745c118f239 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -100,6 +100,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); + peer->T3_rtx_timer.expires = 0; + peer->hb_timer.expires = 0; + setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f3e58b275905..35c73e82553a 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -43,37 +43,44 @@ */ #include <linux/types.h> +#include <linux/bitmap.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> static void sctp_tsnmap_update(struct sctp_tsnmap *map); -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end); +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, - __u32 initial_tsn) + __u32 initial_tsn, gfp_t gfp) { - map->tsn_map = map->raw_map; - map->overflow_map = map->tsn_map + len; - map->len = len; - - /* Clear out a TSN ack status. */ - memset(map->tsn_map, 0x00, map->len + map->len); + if (!map->tsn_map) { + map->tsn_map = kzalloc(len>>3, gfp); + if (map->tsn_map == NULL) + return NULL; + + map->len = len; + } else { + bitmap_zero(map->tsn_map, map->len); + } /* Keep track of TSNs represented by tsn_map. */ map->base_tsn = initial_tsn; - map->overflow_tsn = initial_tsn + map->len; map->cumulative_tsn_ack_point = initial_tsn - 1; map->max_tsn_seen = map->cumulative_tsn_ack_point; - map->malloced = 0; map->num_dup_tsns = 0; return map; } +void sctp_tsnmap_free(struct sctp_tsnmap *map) +{ + map->len = 0; + kfree(map->tsn_map); +} + /* Test the tracking state of this TSN. * Returns: * 0 if the TSN has not yet been seen @@ -82,66 +89,69 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, */ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; - int dup; + u32 gap; + + /* Check to see if this is an old TSN */ + if (TSN_lte(tsn, map->cumulative_tsn_ack_point)) + return 1; + + /* Verify that we can hold this TSN and that it will not + * overlfow our map + */ + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) + return -1; /* Calculate the index into the mapping arrays. */ gap = tsn - map->base_tsn; - /* Verify that we can hold this TSN. */ - if (gap >= (/* base */ map->len + /* overflow */ map->len)) { - dup = -1; - goto out; - } - - /* Honk if we've already seen this TSN. - * We have three cases: - * 1. The TSN is ancient or belongs to a previous tsn_map. - * 2. The TSN is already marked in the tsn_map. - * 3. The TSN is already marked in the tsn_map_overflow. - */ - if (gap < 0 || - (gap < map->len && map->tsn_map[gap]) || - (gap >= map->len && map->overflow_map[gap - map->len])) - dup = 1; + /* Check to see if TSN has already been recorded. */ + if (gap < map->len && test_bit(gap, map->tsn_map)) + return 1; else - dup = 0; - -out: - return dup; + return 0; } /* Mark this TSN as seen. */ -void sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u16 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) - return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) - return; - - /* Bump the max. */ - if (TSN_lt(map->max_tsn_seen, tsn)) - map->max_tsn_seen = tsn; + return 0; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; - /* Mark the TSN as received. */ - if (gap < map->len) - map->tsn_map[gap]++; - else - map->overflow_map[gap - map->len]++; + if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + return -ENOMEM; - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); + if (!sctp_tsnmap_has_gap(map) && gap == 0) { + /* In this case the map has no gaps and the tsn we are + * recording is the next expected tsn. We don't touch + * the map but simply bump the values. + */ + map->max_tsn_seen++; + map->cumulative_tsn_ack_point++; + map->base_tsn++; + } else { + /* Either we already have a gap, or about to record a gap, so + * have work to do. + * + * Bump the max. + */ + if (TSN_lt(map->max_tsn_seen, tsn)) + map->max_tsn_seen = tsn; + + /* Mark the TSN as received. */ + set_bit(gap, map->tsn_map); + + /* Go fixup any internal TSN mapping variables including + * cumulative_tsn_ack_point. + */ + sctp_tsnmap_update(map); + } + + return 0; } @@ -160,66 +170,34 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, struct sctp_tsnmap_iter *iter, __u16 *start, __u16 *end) { - int started, ended; - __u16 start_, end_, offset; - - /* We haven't found a gap yet. */ - started = ended = 0; + int ended = 0; + __u16 start_ = 0, end_ = 0, offset; /* If there are no more gap acks possible, get out fast. */ if (TSN_lte(map->max_tsn_seen, iter->start)) return 0; - /* Search the first mapping array. */ - if (iter->start - map->base_tsn < map->len) { - - offset = iter->start - map->base_tsn; - sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0, - &started, &start_, &ended, &end_); - } - - /* Do we need to check the overflow map? */ - if (!ended) { - /* Fix up where we'd like to start searching in the - * overflow map. - */ - if (iter->start - map->base_tsn < map->len) - offset = 0; - else - offset = iter->start - map->base_tsn - map->len; - - /* Search the overflow map. */ - sctp_tsnmap_find_gap_ack(map->overflow_map, - offset, - map->len, - map->len, - &started, &start_, - &ended, &end_); - } + offset = iter->start - map->base_tsn; + sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, + &start_, &end_); - /* The Gap Ack Block happens to end at the end of the - * overflow map. - */ - if (started && !ended) { - ended++; - end_ = map->len + map->len - 1; - } + /* The Gap Ack Block happens to end at the end of the map. */ + if (start_ && !end_) + end_ = map->len - 1; /* If we found a Gap Ack Block, return the start and end and * bump the iterator forward. */ - if (ended) { + if (end_) { /* Fix up the start and end based on the - * Cumulative TSN Ack offset into the map. + * Cumulative TSN Ack which is always 1 behind base. */ - int gap = map->cumulative_tsn_ack_point - - map->base_tsn; - - *start = start_ - gap; - *end = end_ - gap; + *start = start_ + 1; + *end = end_ + 1; /* Move the iterator forward. */ iter->start = map->cumulative_tsn_ack_point + *end + 1; + ended = 1; } return ended; @@ -228,35 +206,33 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, /* Mark this and any lower TSN as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) return; /* Bump the max. */ if (TSN_lt(map->max_tsn_seen, tsn)) map->max_tsn_seen = tsn; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn + 1; - /* Mark the TSNs as received. */ - if (gap <= map->len) - memset(map->tsn_map, 0x01, gap); - else { - memset(map->tsn_map, 0x01, map->len); - memset(map->overflow_map, 0x01, (gap - map->len)); + map->base_tsn += gap; + map->cumulative_tsn_ack_point += gap; + if (gap >= map->len) { + /* If our gap is larger then the map size, just + * zero out the map. + */ + bitmap_zero(map->tsn_map, map->len); + } else { + /* If the gap is smaller then the map size, + * shift the map by 'gap' bits and update further. + */ + bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); + sctp_tsnmap_update(map); } - - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); } /******************************************************************** @@ -268,27 +244,19 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ static void sctp_tsnmap_update(struct sctp_tsnmap *map) { - __u32 ctsn; - - ctsn = map->cumulative_tsn_ack_point; - do { - ctsn++; - if (ctsn == map->overflow_tsn) { - /* Now tsn_map must have been all '1's, - * so we swap the map and check the overflow table - */ - __u8 *tmp = map->tsn_map; - memset(tmp, 0, map->len); - map->tsn_map = map->overflow_map; - map->overflow_map = tmp; - - /* Update the tsn_map boundaries. */ - map->base_tsn += map->len; - map->overflow_tsn += map->len; - } - } while (map->tsn_map[ctsn - map->base_tsn]); + u16 len; + unsigned long zero_bit; + + + len = map->max_tsn_seen - map->cumulative_tsn_ack_point; + zero_bit = find_first_zero_bit(map->tsn_map, len); + if (!zero_bit) + return; /* The first 0-bit is bit 0. nothing to do */ + + map->base_tsn += zero_bit; + map->cumulative_tsn_ack_point += zero_bit; - map->cumulative_tsn_ack_point = ctsn - 1; /* Back up one. */ + bitmap_shift_right(map->tsn_map, map->tsn_map, zero_bit, map->len); } /* How many data chunks are we missing from our peer? @@ -299,31 +267,19 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map) __u32 max_tsn = map->max_tsn_seen; __u32 base_tsn = map->base_tsn; __u16 pending_data; - __s32 gap, start, end, i; + u32 gap, i; pending_data = max_tsn - cum_tsn; gap = max_tsn - base_tsn; - if (gap <= 0 || gap >= (map->len + map->len)) + if (gap == 0 || gap >= map->len) goto out; - start = ((cum_tsn >= base_tsn) ? (cum_tsn - base_tsn + 1) : 0); - end = ((gap > map->len ) ? map->len : gap + 1); - - for (i = start; i < end; i++) { - if (map->tsn_map[i]) + for (i = 0; i < gap+1; i++) { + if (test_bit(i, map->tsn_map)) pending_data--; } - if (gap >= map->len) { - start = 0; - end = gap - map->len + 1; - for (i = start; i < end; i++) { - if (map->overflow_map[i]) - pending_data--; - } - } - out: return pending_data; } @@ -334,10 +290,8 @@ out: * The flags "started" and "ended" tell is if we found the beginning * or (respectively) the end of a Gap Ack Block. */ -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end) +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end) { int i = off; @@ -348,56 +302,44 @@ static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, /* Also, stop looking past the maximum TSN seen. */ /* Look for the start. */ - if (!(*started)) { - for (; i < len; i++) { - if (map[i]) { - (*started)++; - *start = base + i; - break; - } - } - } + i = find_next_bit(map, len, off); + if (i < len) + *start = i; /* Look for the end. */ - if (*started) { + if (*start) { /* We have found the start, let's find the * end. If we find the end, break out. */ - for (; i < len; i++) { - if (!map[i]) { - (*ended)++; - *end = base + i - 1; - break; - } - } + i = find_next_zero_bit(map, len, i); + if (i < len) + *end = i - 1; } } /* Renege that we have seen a TSN. */ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + /* Assert: TSN is in range. */ + if (!TSN_lt(tsn, map->base_tsn + map->len)) return; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; /* Pretend we never saw the TSN. */ - if (gap < map->len) - map->tsn_map[gap] = 0; - else - map->overflow_map[gap - map->len] = 0; + clear_bit(gap, map->tsn_map); } /* How many gap ack blocks do we have recorded? */ -__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) +__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, + struct sctp_gap_ack_block *gabs) { struct sctp_tsnmap_iter iter; - int gabs = 0; + int ngaps = 0; /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { @@ -407,12 +349,36 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) &start, &end)) { - map->gabs[gabs].start = htons(start); - map->gabs[gabs].end = htons(end); - gabs++; - if (gabs >= SCTP_MAX_GABS) + gabs[ngaps].start = htons(start); + gabs[ngaps].end = htons(end); + ngaps++; + if (ngaps >= SCTP_MAX_GABS) break; } } - return gabs; + return ngaps; +} + +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +{ + unsigned long *new; + unsigned long inc; + u16 len; + + if (gap >= SCTP_TSN_MAP_SIZE) + return 0; + + inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); + + new = kzalloc(len>>3, GFP_ATOMIC); + if (!new) + return 0; + + bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + kfree(map->tsn_map); + map->tsn_map = new; + map->len = len; + + return 1; } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ce6cda6b6994..5f186ca550d7 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -710,6 +710,13 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (!skb) goto fail; + /* Now that all memory allocations for this chunk succeeded, we + * can mark it as received so the tsn_map is updated correctly. + */ + if (sctp_tsnmap_mark(&asoc->peer.tsn_map, + ntohl(chunk->subh.data_hdr->tsn))) + goto fail_mark; + /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. * @@ -750,8 +757,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, event->msg_flags |= chunk->chunk_hdr->flags; event->iif = sctp_chunk_iif(chunk); -fail: return event; + +fail_mark: + kfree_skb(skb); +fail: + return NULL; } /* Create a partial delivery related event. diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5061a26c5028..7b23803343cc 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -317,7 +317,7 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, } /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm); + __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } @@ -825,8 +825,7 @@ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby); - + __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, diff --git a/net/socket.c b/net/socket.c index 66c4a8cf6db9..3e8d4e35c08f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -63,11 +63,13 @@ #include <linux/file.h> #include <linux/net.h> #include <linux/interrupt.h> +#include <linux/thread_info.h> #include <linux/rcupdate.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> +#include <linux/thread_info.h> #include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> @@ -90,6 +92,7 @@ #include <asm/unistd.h> #include <net/compat.h> +#include <net/wext.h> #include <net/sock.h> #include <linux/netfilter.h> @@ -179,9 +182,9 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * invalid addresses -EFAULT is returned. On a success 0 is returned. */ -int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) +int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) { - if (ulen < 0 || ulen > MAX_SOCK_ADDR) + if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) return -EINVAL; if (ulen == 0) return 0; @@ -207,7 +210,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) * specified. Zero is returned for a success. */ -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, +int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen) { int err; @@ -218,7 +221,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, return err; if (len > klen) len = klen; - if (len < 0 || len > MAX_SOCK_ADDR) + if (len < 0 || len > sizeof(struct sockaddr_storage)) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -262,7 +265,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -348,11 +351,11 @@ static struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_fd(struct file **filep) +static int sock_alloc_fd(struct file **filep, int flags) { int fd; - fd = get_unused_fd(); + fd = get_unused_fd_flags(flags); if (likely(fd >= 0)) { struct file *file = get_empty_filp(); @@ -366,7 +369,7 @@ static int sock_alloc_fd(struct file **filep) return fd; } -static int sock_attach_fd(struct socket *sock, struct file *file) +static int sock_attach_fd(struct socket *sock, struct file *file, int flags) { struct dentry *dentry; struct qstr name = { .name = "" }; @@ -388,20 +391,20 @@ static int sock_attach_fd(struct socket *sock, struct file *file) init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, &socket_file_ops); SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_flags = O_RDWR; + file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; return 0; } -int sock_map_fd(struct socket *sock) +int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_fd(&newfile); + int fd = sock_alloc_fd(&newfile, flags); if (likely(fd >= 0)) { - int err = sock_attach_fd(sock, newfile); + int err = sock_attach_fd(sock, newfile, flags); if (unlikely(err < 0)) { put_filp(newfile); @@ -1217,12 +1220,27 @@ asmlinkage long sys_socket(int family, int type, int protocol) { int retval; struct socket *sock; + int flags; + + /* Check the SOCK_* constants for consistency. */ + BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); + BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); + BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; - retval = sock_map_fd(sock); + retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); if (retval < 0) goto out_release; @@ -1245,6 +1263,15 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, struct socket *sock1, *sock2; int fd1, fd2, err; struct file *newfile1, *newfile2; + int flags; + + flags = type & ~SOCK_TYPE_MASK; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + type &= SOCK_TYPE_MASK; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; /* * Obtain the first socket and check if the underlying protocol @@ -1263,13 +1290,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_fd(&newfile1); + fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_fd(&newfile2); + fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); if (unlikely(fd2 < 0)) { err = fd2; put_filp(newfile1); @@ -1277,12 +1304,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, goto out_release_both; } - err = sock_attach_fd(sock1, newfile1); + err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); if (unlikely(err < 0)) { goto out_fd2; } - err = sock_attach_fd(sock2, newfile2); + err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); if (unlikely(err < 0)) { fput(newfile1); goto out_fd1; @@ -1341,20 +1368,20 @@ out_fd: asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, address); + err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); if (err >= 0) { err = security_socket_bind(sock, - (struct sockaddr *)address, + (struct sockaddr *)&address, addrlen); if (!err) err = sock->ops->bind(sock, (struct sockaddr *) - address, addrlen); + &address, addrlen); } fput_light(sock->file, fput_needed); } @@ -1400,13 +1427,19 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; int err, len, newfd, fput_needed; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; + + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1425,14 +1458,14 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_fd(&newfile); + newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } - err = sock_attach_fd(newsock, newfile); + err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); if (err < 0) goto out_fd_simple; @@ -1445,13 +1478,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out_fd; if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)address, + if (newsock->ops->getname(newsock, (struct sockaddr *)&address, &len, 2) < 0) { err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user(address, len, upeer_sockaddr, - upeer_addrlen); + err = move_addr_to_user((struct sockaddr *)&address, + len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; } @@ -1478,6 +1511,68 @@ out_fd: goto out_put; } +#if 0 +#ifdef HAVE_SET_RESTORE_SIGMASK +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + sigset_t ksigmask, sigsaved; + int ret; + + if (sigmask) { + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) + return -EFAULT; + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); + + if (ret < 0 && signal_pending(current)) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} +#else +asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, + const sigset_t __user *sigmask, + size_t sigsetsize, int flags) +{ + /* The platform does not support restoring the signal mask in the + * return path. So we do not allow using paccept() with a signal + * mask. */ + if (sigmask) + return -EINVAL; + + return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); +} +#endif +#endif + +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen) +{ + return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); +} + /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -1494,22 +1589,22 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = move_addr_to_kernel(uservaddr, addrlen, address); + err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); if (err < 0) goto out_put; err = - security_socket_connect(sock, (struct sockaddr *)address, addrlen); + security_socket_connect(sock, (struct sockaddr *)&address, addrlen); if (err) goto out_put; - err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, + err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags); out_put: fput_light(sock->file, fput_needed); @@ -1526,7 +1621,7 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int len, err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1537,10 +1632,10 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, if (err) goto out_put; - err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); + err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); if (err) goto out_put; - err = move_addr_to_user(address, len, usockaddr, usockaddr_len); + err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1557,7 +1652,7 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int len, err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1569,10 +1664,10 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, } err = - sock->ops->getname(sock, (struct sockaddr *)address, &len, + sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1); if (!err) - err = move_addr_to_user(address, len, usockaddr, + err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -1590,7 +1685,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, int addr_len) { struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err; struct msghdr msg; struct iovec iov; @@ -1609,10 +1704,10 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, msg.msg_controllen = 0; msg.msg_namelen = 0; if (addr) { - err = move_addr_to_kernel(addr, addr_len, address); + err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); if (err < 0) goto out_put; - msg.msg_name = address; + msg.msg_name = (struct sockaddr *)&address; msg.msg_namelen = addr_len; } if (sock->file->f_flags & O_NONBLOCK) @@ -1648,7 +1743,7 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, struct socket *sock; struct iovec iov; struct msghdr msg; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; int err, err2; int fput_needed; @@ -1662,14 +1757,15 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = address; - msg.msg_namelen = MAX_SOCK_ADDR; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = sizeof(address); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + err2 = move_addr_to_user((struct sockaddr *)&address, + msg.msg_namelen, addr, addr_len); if (err2 < 0) err = err2; } @@ -1789,7 +1885,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; - char address[MAX_SOCK_ADDR]; + struct sockaddr_storage address; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] __attribute__ ((aligned(sizeof(__kernel_size_t)))); @@ -1827,9 +1923,13 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) /* This will also move the address data into kernel space */ if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); + err = verify_compat_iovec(&msg_sys, iov, + (struct sockaddr *)&address, + VERIFY_READ); } else - err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); + err = verify_iovec(&msg_sys, iov, + (struct sockaddr *)&address, + VERIFY_READ); if (err < 0) goto out_freeiov; total_len = err; @@ -1900,7 +2000,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, int fput_needed; /* kernel mode address */ - char addr[MAX_SOCK_ADDR]; + struct sockaddr_storage addr; /* user mode address pointers */ struct sockaddr __user *uaddr; @@ -1938,9 +2038,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, uaddr = (__force void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); + err = verify_compat_iovec(&msg_sys, iov, + (struct sockaddr *)&addr, + VERIFY_WRITE); } else - err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); + err = verify_iovec(&msg_sys, iov, + (struct sockaddr *)&addr, + VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; @@ -1956,7 +2060,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, len = err; if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, + err = move_addr_to_user((struct sockaddr *)&addr, + msg_sys.msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; @@ -1988,10 +2093,11 @@ out: /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[18]={ +static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), + AL(6) }; #undef AL @@ -2010,7 +2116,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_RECVMSG) + if (call < 1 || call > SYS_PACCEPT) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2039,8 +2145,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) break; case SYS_ACCEPT: err = - sys_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + do_accept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2087,6 +2193,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_PACCEPT: + err = + sys_paccept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], + (const sigset_t __user *) a[3], + a[4], a[5]); + break; default: err = -EINVAL; break; @@ -2210,10 +2323,19 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, { struct socket *sock = file->private_data; int ret = -ENOIOCTLCMD; + struct sock *sk; + struct net *net; + + sk = sock->sk; + net = sock_net(sk); if (sock->ops->compat_ioctl) ret = sock->ops->compat_ioctl(sock, cmd, arg); + if (ret == -ENOIOCTLCMD && + (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) + ret = compat_wext_handle_ioctl(net, cmd, arg); + return ret; } #endif diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index f3431a7e33da..4de8bcf26fa7 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -5,12 +5,12 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ - gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o + gss_mech_switch.o svcauth_gss.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o gss_krb5_wrap.o + gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc12d5f5d5da..853a4142cea1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -33,8 +33,6 @@ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ */ @@ -63,22 +61,11 @@ static const struct rpc_credops gss_nullops; # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define NFS_NGROUPS 16 - -#define GSS_CRED_SLACK 1024 /* XXX: unused */ +#define GSS_CRED_SLACK 1024 /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -/* XXX this define must match the gssd define -* as it is passed to gssd to signal the use of -* machine creds should be part of the shared rpc interface */ - -#define CA_RUN_AS_MACHINE 0x00000200 - -/* dump the buffer in `emacs-hexl' style */ -#define isprint(c) ((c > 0x1f) && (c < 0x7f)) - struct gss_auth { struct kref kref; struct rpc_auth rpc_auth; @@ -146,7 +133,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup(p, len, GFP_NOFS); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); dest->len = len; @@ -171,7 +158,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_NOFS); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -272,7 +259,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) return NULL; } -/* Try to add a upcall to the pipefs queue. +/* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ @@ -341,7 +328,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg != NULL) { INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -493,7 +480,6 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; - struct rpc_clnt *clnt; struct gss_upcall_msg *gss_msg; struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; @@ -503,11 +489,10 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_KERNEL); + buf = kmalloc(mlen, GFP_NOFS); if (!buf) goto out; - clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -806,7 +791,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 1d52308ca324..c93fca204558 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -83,8 +83,6 @@ out: return ret; } -EXPORT_SYMBOL(krb5_encrypt); - u32 krb5_decrypt( struct crypto_blkcipher *tfm, @@ -118,8 +116,6 @@ out: return ret; } -EXPORT_SYMBOL(krb5_decrypt); - static int checksummer(struct scatterlist *sg, void *data) { @@ -161,8 +157,6 @@ out: return err ? GSS_S_FAILURE : 0; } -EXPORT_SYMBOL(make_checksum); - struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ struct blkcipher_desc desc; @@ -262,8 +256,6 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, return ret; } -EXPORT_SYMBOL(gss_encrypt_xdr_buf); - struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ struct blkcipher_desc desc; @@ -334,5 +326,3 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); } - -EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60c3dba545d7..ef45eba22485 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -70,7 +70,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); res->len = len; @@ -131,7 +131,7 @@ gss_import_sec_context_kerberos(const void *p, struct krb5_ctx *ctx; int tmp; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 5f1d36dfbcf7..b8f42ef7178e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -78,7 +78,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - unsigned char *ptr, *krb5_hdr, *msg_start; + unsigned char *ptr, *msg_start; s32 now; u32 seq_send; @@ -87,36 +87,36 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - token->len = g_token_size(&ctx->mech_used, 24); + token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8); ptr = token->data; - g_make_token_header(&ctx->mech_used, 24, &ptr); + g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr); - *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); - *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); + /* ptr now at header described in rfc 1964, section 1.2.1: */ + ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff); + ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff); - /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ - krb5_hdr = ptr - 2; - msg_start = krb5_hdr + 24; + msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8; - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); - memset(krb5_hdr + 4, 0xff, 4); + *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); + memset(ptr + 4, 0xff, 4); - if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum)) + if (make_checksum("md5", ptr, 8, text, 0, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); + memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - seq_send, krb5_hdr + 16, krb5_hdr + 8)) + seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, + ptr + 8)) return GSS_S_FAILURE; return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index d91a5d004803..066ec73c84d6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -92,30 +92,30 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, read_token->len)) return GSS_S_DEFECTIVE_TOKEN; - if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || - (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) + if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) || + (ptr[1] != (KG_TOK_MIC_MSG & 0xff))) return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ - signalg = ptr[0] + (ptr[1] << 8); + signalg = ptr[2] + (ptr[3] << 8); if (signalg != SGN_ALG_DES_MAC_MD5) return GSS_S_DEFECTIVE_TOKEN; - sealalg = ptr[2] + (ptr[3] << 8); + sealalg = ptr[4] + (ptr[5] << 8); if (sealalg != SEAL_ALG_NONE) return GSS_S_DEFECTIVE_TOKEN; - if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) return GSS_S_DEFECTIVE_TOKEN; - if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum)) + if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) return GSS_S_FAILURE; - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ @@ -127,7 +127,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, /* do sequencing checks */ - if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum)) + if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum)) return GSS_S_FAILURE; if ((ctx->initiate && direction != 0xff) || diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index b00b1b426301..ae8e69b59c4c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -87,8 +87,8 @@ out: return 0; } -static inline void -make_confounder(char *p, int blocksize) +static void +make_confounder(char *p, u32 conflen) { static u64 i = 0; u64 *q = (u64 *)p; @@ -102,8 +102,22 @@ make_confounder(char *p, int blocksize) * uniqueness would mean worrying about atomicity and rollover, and I * don't care enough. */ - BUG_ON(blocksize != 8); - *q = i++; + /* initialize to random value */ + if (i == 0) { + i = random32(); + i = (i << 32) | random32(); + } + + switch (conflen) { + case 16: + *q++ = i++; + /* fall through */ + case 8: + *q++ = i++; + break; + default: + BUG(); + } } /* Assumptions: the head and tail of inbuf are ours to play with. @@ -122,7 +136,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; int blocksize = 0, plainlen; - unsigned char *ptr, *krb5_hdr, *msg_start; + unsigned char *ptr, *msg_start; s32 now; int headlen; struct page **tmp_pages; @@ -149,26 +163,26 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, buf->len += headlen; BUG_ON((buf->len - offset - headlen) % blocksize); - g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); + g_make_token_header(&kctx->mech_used, + GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr); - *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); - *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + /* ptr now at header described in rfc 1964, section 1.2.1: */ + ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff); + ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff); - /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ - krb5_hdr = ptr - 2; - msg_start = krb5_hdr + 24; + msg_start = ptr + 24; - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); - memset(krb5_hdr + 4, 0xff, 4); - *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES); + *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); + memset(ptr + 4, 0xff, 4); + *(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES); make_confounder(msg_start, blocksize); /* XXXJBF: UGH!: */ tmp_pages = buf->pages; buf->pages = pages; - if (make_checksum("md5", krb5_hdr, 8, buf, + if (make_checksum("md5", ptr, 8, buf, offset + headlen - blocksize, &md5cksum)) return GSS_S_FAILURE; buf->pages = tmp_pages; @@ -176,7 +190,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); + memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; @@ -185,7 +199,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, /* XXX would probably be more efficient to compute checksum * and encrypt at the same time: */ if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, - seq_send, krb5_hdr + 16, krb5_hdr + 8))) + seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))) return GSS_S_FAILURE; if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, @@ -219,38 +233,38 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) buf->len - offset)) return GSS_S_DEFECTIVE_TOKEN; - if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || - (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || + (ptr[1] != (KG_TOK_WRAP_MSG & 0xff))) return GSS_S_DEFECTIVE_TOKEN; /* XXX sanity-check bodysize?? */ /* get the sign and seal algorithms */ - signalg = ptr[0] + (ptr[1] << 8); + signalg = ptr[2] + (ptr[3] << 8); if (signalg != SGN_ALG_DES_MAC_MD5) return GSS_S_DEFECTIVE_TOKEN; - sealalg = ptr[2] + (ptr[3] << 8); + sealalg = ptr[4] + (ptr[5] << 8); if (sealalg != SEAL_ALG_DES) return GSS_S_DEFECTIVE_TOKEN; - if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) return GSS_S_DEFECTIVE_TOKEN; if (gss_decrypt_xdr_buf(kctx->enc, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base)) return GSS_S_DEFECTIVE_TOKEN; - if (make_checksum("md5", ptr - 2, 8, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) + if (make_checksum("md5", ptr, 8, buf, + ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) return GSS_S_FAILURE; if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) + if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) return GSS_S_BAD_SIG; /* it got through unscathed. Make sure the context is unexpired */ @@ -262,8 +276,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* do sequencing checks */ - if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum)) + if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, + &direction, &seqnum)) return GSS_S_BAD_SIG; if ((kctx->initiate && direction != 0xff) || @@ -274,7 +288,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) * better to copy and encrypt at the same time. */ blocksize = crypto_blkcipher_blocksize(kctx->enc); - data_start = ptr + 22 + blocksize; + data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; memmove(orig_start, data_start, data_len); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5deb4b6e4514..035e1dd6af1b 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -76,7 +76,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_KERNEL); + res->data = kmemdup(p, len, GFP_NOFS); if (unlikely(res->data == NULL)) return ERR_PTR(-ENOMEM); return q; @@ -90,7 +90,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 6cdd241ad267..3308157436d2 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,7 +90,7 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kzalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_NOFS))) return 0; out->len = explen; memcpy(out->data, in, enclen); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5905d56737d6..81ae3d62a0cc 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: + /* placeholders for length and seq. number: */ + svc_putnl(resv, 0); + svc_putnl(resv, 0); if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + break; + case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - break; - case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; - /* placeholders for length and seq. number: */ - svc_putnl(resv, 0); - svc_putnl(resv, 0); break; default: goto auth_err; @@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } garbage_args: - /* Restore write pointer to its original value: */ - xdr_ressize_check(rqstp, reject_stat); ret = SVC_GARBAGE; goto out; auth_err: diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 44920b90bdc4..46b2647c5bd2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -66,7 +66,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS))) return ERR_PTR(-ENOMEM); rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8945307556ec..da0789fa1b88 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/kallsyms.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp_lock.h> @@ -58,7 +59,6 @@ static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); -static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); @@ -70,9 +70,9 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static __be32 * call_header(struct rpc_task *task); -static __be32 * call_verify(struct rpc_task *task); +static __be32 *rpc_encode_header(struct rpc_task *task); +static __be32 *rpc_verify_header(struct rpc_task *task); static int rpc_ping(struct rpc_clnt *clnt, int flags); static void rpc_register_client(struct rpc_clnt *clnt) @@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_prog = program->number; + clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); @@ -324,6 +324,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_autobind = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; return clnt; } @@ -690,6 +692,21 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); +#ifdef RPC_DEBUG +static const char *rpc_proc_name(const struct rpc_task *task) +{ + const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + + if (proc) { + if (proc->p_name) + return proc->p_name; + else + return "NULL"; + } else + return "no proc"; +} +#endif + /* * 0. Initial state * @@ -701,9 +718,9 @@ call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %5u call_start %s%d proc %d (%s)\n", task->tk_pid, + dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_protname, clnt->cl_vers, - task->tk_msg.rpc_proc->p_proc, + rpc_proc_name(task), (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count */ @@ -861,7 +878,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) * 3. Encode arguments of an RPC call */ static void -call_encode(struct rpc_task *task) +rpc_xdr_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t encode; @@ -876,23 +893,19 @@ call_encode(struct rpc_task *task) (char *)req->rq_buffer + req->rq_callsize, req->rq_rcvsize); - /* Encode header and provided arguments */ - encode = task->tk_msg.rpc_proc->p_encode; - if (!(p = call_header(task))) { - printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); + p = rpc_encode_header(task); + if (p == NULL) { + printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); rpc_exit(task, -EIO); return; } + + encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) return; task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - if (task->tk_status == -ENOMEM) { - /* XXX: Is this sane? */ - rpc_delay(task, 3*HZ); - task->tk_status = -EAGAIN; - } } /* @@ -929,11 +942,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); + case -ENOMEM: + dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); + rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " @@ -1046,10 +1057,16 @@ call_transmit(struct rpc_task *task) /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); - call_encode(task); + rpc_xdr_encode(task); /* Did the encode result in an error condition? */ - if (task->tk_status != 0) + if (task->tk_status != 0) { + /* Was the error nonfatal? */ + if (task->tk_status == -EAGAIN) + rpc_delay(task, HZ >> 4); + else + rpc_exit(task, task->tk_status); return; + } } xprt_transmit(task); if (task->tk_status < 0) @@ -1132,7 +1149,8 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - printk("%s: RPC call returned error %d\n", + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); } @@ -1157,7 +1175,8 @@ call_timeout(struct rpc_task *task) task->tk_timeouts++; if (RPC_IS_SOFT(task)) { - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; @@ -1165,7 +1184,8 @@ call_timeout(struct rpc_task *task) if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); @@ -1196,8 +1216,9 @@ call_decode(struct rpc_task *task) task->tk_pid, task->tk_status); if (task->tk_flags & RPC_CALL_MAJORSEEN) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; } @@ -1224,8 +1245,7 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* Verify the RPC header */ - p = call_verify(task); + p = rpc_verify_header(task); if (IS_ERR(p)) { if (p == ERR_PTR(-EAGAIN)) goto out_retry; @@ -1243,7 +1263,7 @@ call_decode(struct rpc_task *task) return; out_retry: task->tk_status = 0; - /* Note: call_verify() may have freed the RPC slot */ + /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { req->rq_received = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) @@ -1290,11 +1310,8 @@ call_refreshresult(struct rpc_task *task) return; } -/* - * Call header serialization - */ static __be32 * -call_header(struct rpc_task *task) +rpc_encode_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; @@ -1314,11 +1331,8 @@ call_header(struct rpc_task *task) return p; } -/* - * Reply header verification - */ static __be32 * -call_verify(struct rpc_task *task) +rpc_verify_header(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; @@ -1392,7 +1406,7 @@ call_verify(struct rpc_task *task) task->tk_action = call_bind; goto out_retry; case RPC_AUTH_TOOWEAK: - printk(KERN_NOTICE "call_verify: server %s requires stronger " + printk(KERN_NOTICE "RPC: server %s requires stronger " "authentication.\n", task->tk_client->cl_server); break; default: @@ -1431,10 +1445,10 @@ call_verify(struct rpc_task *task) error = -EPROTONOSUPPORT; goto out_err; case RPC_PROC_UNAVAIL: - dprintk("RPC: %5u %s: proc %p unsupported by program %u, " + dprintk("RPC: %5u %s: proc %s unsupported by program %u, " "version %u on server %s\n", task->tk_pid, __func__, - task->tk_msg.rpc_proc, + rpc_proc_name(task), task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); @@ -1517,44 +1531,53 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG +static void rpc_show_header(void) +{ + printk(KERN_INFO "-pid- flgs status -client- --rqstp- " + "-timeout ---ops--\n"); +} + +static void rpc_show_task(const struct rpc_clnt *clnt, + const struct rpc_task *task) +{ + const char *rpc_waitq = "none"; + char *p, action[KSYM_SYMBOL_LEN]; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + /* map tk_action pointer to a function name; then trim off + * the "+0x0 [sunrpc]" */ + sprint_symbol(action, (unsigned long)task->tk_action); + p = strchr(action, '+'); + if (p) + *p = '\0'; + + printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, + clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), + action, rpc_waitq); +} + void rpc_show_tasks(void) { struct rpc_clnt *clnt; - struct rpc_task *t; + struct rpc_task *task; + int header = 0; spin_lock(&rpc_client_lock); - if (list_empty(&all_clients)) - goto out; - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); list_for_each_entry(clnt, &all_clients, cl_clients) { - if (list_empty(&clnt->cl_tasks)) - continue; spin_lock(&clnt->cl_lock); - list_for_each_entry(t, &clnt->cl_tasks, tk_task) { - const char *rpc_waitq = "none"; - int proc; - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->tk_waitqueue); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!header) { + rpc_show_header(); + header++; + } + rpc_show_task(clnt, task); } spin_unlock(&clnt->cl_lock); } -out: spin_unlock(&rpc_client_lock); } #endif diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5a9b0e7828cd..23a2b8f6dc49 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -897,7 +897,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(struct kmem_cache * cachep, void *foo) +init_once(void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0517967a68bf..34abc91058d8 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> +#include <net/ipv6.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> @@ -32,6 +33,10 @@ #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) +#define RPCBVERS_2 (2u) +#define RPCBVERS_3 (3u) +#define RPCBVERS_4 (4u) + enum { RPCBPROC_NULL, RPCBPROC_SET, @@ -64,6 +69,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); +static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; struct rpcbind_args { @@ -76,41 +82,73 @@ struct rpcbind_args { const char * r_netid; const char * r_addr; const char * r_owner; + + int r_status; }; static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; +static struct rpc_procinfo rpcb_procedures4[]; struct rpcb_info { - int rpc_vers; + u32 rpc_vers; struct rpc_procinfo * rpc_proc; }; static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; +static const struct rpc_call_ops rpcb_getport_ops = { + .rpc_call_done = rpcb_getport_done, + .rpc_release = rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + static void rpcb_map_release(void *data) { struct rpcbind_args *map = data; + rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); kfree(map); } -static const struct rpc_call_ops rpcb_getport_ops = { - .rpc_call_done = rpcb_getport_done, - .rpc_release = rpcb_map_release, +static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), }; -static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +static const struct sockaddr_in6 rpcb_in6addr_loopback = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + .sin6_port = htons(RPCBIND_PORT), +}; + +static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, + size_t addrlen, u32 version) { - xprt_clear_binding(xprt); - rpc_wake_up_status(&xprt->binding, status); + struct rpc_create_args args = { + .protocol = XPRT_TRANSPORT_UDP, + .address = addr, + .addrsize = addrlen, + .servername = "localhost", + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + + return rpc_create(&args); } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - size_t salen, int proto, u32 version, - int privileged) + size_t salen, int proto, u32 version) { struct rpc_create_args args = { .protocol = proto, @@ -120,7 +158,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_NONPRIVPORT), }; switch (srvaddr->sa_family) { @@ -134,29 +173,69 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return NULL; } - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); } +static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, + u32 version, struct rpc_message *msg) +{ + struct rpc_clnt *rpcb_clnt; + int result, error = 0; + + msg->rpc_resp = &result; + + rpcb_clnt = rpcb_create_local(addr, addrlen, version); + if (!IS_ERR(rpcb_clnt)) { + error = rpc_call_sync(rpcb_clnt, msg, 0); + rpc_shutdown_client(rpcb_clnt); + } else + error = PTR_ERR(rpcb_clnt); + + if (error < 0) { + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + return error; + } + + if (!result) + return -EACCES; + return 0; +} + /** * rpcb_register - set or unset a port registration with the local rpcbind svc * @prog: RPC program number to bind * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request + * @prot: transport protocol to register * @port: port value to register - * @okay: result code * - * port == 0 means unregister, port != 0 means register. + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, transport] + * tuple they wish to advertise. * - * This routine supports only rpcbind version 2. + * Callers may also unregister RPC services that are no longer + * available by setting the passed-in port to zero. This removes + * all registered transports for [program, version] from the local + * rpcbind database. + * + * This function uses rpcbind protocol version 2 to contact the + * local rpcbind daemon. + * + * Registration works over both AF_INET and AF_INET6, and services + * registered via this function are advertised as available for any + * address. If the local rpcbind daemon is listening on AF_INET6, + * services registered via this function will be advertised on + * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 + * addresses). */ -int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; struct rpcbind_args map = { .r_prog = prog, .r_vers = vers, @@ -164,32 +243,152 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .r_port = port, }; struct rpc_message msg = { - .rpc_proc = &rpcb_procedures2[port ? - RPCBPROC_SET : RPCBPROC_UNSET], .rpc_argp = &map, - .rpc_resp = okay, }; - struct rpc_clnt *rpcb_clnt; - int error = 0; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); - rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); - if (IS_ERR(rpcb_clnt)) - return PTR_ERR(rpcb_clnt); + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; + if (port) + msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - error = rpc_call_sync(rpcb_clnt, &msg, 0); + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_2, &msg); +} - rpc_shutdown_client(rpcb_clnt); - if (error < 0) - printk(KERN_WARNING "RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *okay); +/* + * Fill in AF_INET family-specific arguments to register + */ +static int rpcb_register_netid4(struct sockaddr_in *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin_port); + char buf[32]; + + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), + NIPQUAD_FMT".%u.%u", + NIPQUAD(address_to_register->sin_addr.s_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, + sizeof(rpcb_inaddr_loopback), + RPCBVERS_4, msg); +} + +/* + * Fill in AF_INET6 family-specific arguments to register + */ +static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, + struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + unsigned short port = ntohs(address_to_register->sin6_port); + char buf[64]; + + /* Construct AF_INET6 universal address */ + if (ipv6_addr_any(&address_to_register->sin6_addr)) + snprintf(buf, sizeof(buf), "::.%u.%u", + port >> 8, port & 0xff); + else + snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); + map->r_addr = buf; + + dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " + "local rpcbind\n", (port ? "" : "un"), + map->r_prog, map->r_vers, + map->r_addr, map->r_netid); + + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + if (port) + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + + return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, + sizeof(rpcb_in6addr_loopback), + RPCBVERS_4, msg); +} + +/** + * rpcb_v4_register - set or unset a port registration with the local rpcbind + * @program: RPC program number of service to (un)register + * @version: RPC version number of service to (un)register + * @address: address family, IP address, and port to (un)register + * @netid: netid of transport protocol to (un)register + * + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). + * + * RPC services invoke this function to advertise their contact + * information via the system's rpcbind daemon. RPC services + * invoke this function once for each [program, version, address, + * netid] tuple they wish to advertise. + * + * Callers may also unregister RPC services that are no longer + * available by setting the port number in the passed-in address + * to zero. Callers pass a netid of "" to unregister all + * transport netids associated with [program, version, address]. + * + * This function uses rpcbind protocol version 4 to contact the + * local rpcbind daemon. The local rpcbind daemon must support + * version 4 of the rpcbind protocol in order for these functions + * to register a service successfully. + * + * Supported netids include "udp" and "tcp" for UDP and TCP over + * IPv4, and "udp6" and "tcp6" for UDP and TCP over IPv6, + * respectively. + * + * The contents of @address determine the address family and the + * port to be registered. The usual practice is to pass INADDR_ANY + * as the raw address, but specifying a non-zero address is also + * supported by this API if the caller wishes to advertise an RPC + * service on a specific network interface. + * + * Note that passing in INADDR_ANY does not create the same service + * registration as IN6ADDR_ANY. The former advertises an RPC + * service on any IPv4 address, but not on IPv6. The latter + * advertises the service on all IPv4 and IPv6 addresses. + */ +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, const char *netid) +{ + struct rpcbind_args map = { + .r_prog = program, + .r_vers = version, + .r_netid = netid, + .r_owner = RPCB_OWNER_STRING, + }; + struct rpc_message msg = { + .rpc_argp = &map, + }; - return error; + switch (address->sa_family) { + case AF_INET: + return rpcb_register_netid4((struct sockaddr_in *)address, + &msg); + case AF_INET6: + return rpcb_register_netid6((struct sockaddr_in6 *)address, + &msg); + } + + return -EAFNOSUPPORT; } /** @@ -227,7 +426,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, - sizeof(*sin), prot, 2, 0); + sizeof(*sin), prot, RPCBVERS_2); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -243,10 +442,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) } EXPORT_SYMBOL_GPL(rpcb_getport_sync); -static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) { struct rpc_message msg = { - .rpc_proc = rpcb_next_version[version].rpc_proc, + .rpc_proc = proc, .rpc_argp = map, .rpc_resp = &map->r_port, }; @@ -271,6 +470,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_procinfo *proc; u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; @@ -280,7 +480,6 @@ void rpcb_getport_async(struct rpc_task *task) struct sockaddr *sap = (struct sockaddr *)&addr; size_t salen; int status; - struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, @@ -289,17 +488,16 @@ void rpcb_getport_async(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); + /* Put self on the wait queue to ensure we get notified if + * some other task is already attempting to bind the port */ + rpc_sleep_on(&xprt->binding, task, NULL); + if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __func__); - goto bailout_nowake; + return; } - /* Put self on queue before sending rpcbind request, in case - * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL); - /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; @@ -313,10 +511,12 @@ void rpcb_getport_async(struct rpc_task *task) /* Don't ever use rpcbind v2 for AF_INET6 requests */ switch (sap->sa_family) { case AF_INET: - info = rpcb_next_version; + proc = rpcb_next_version[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; break; case AF_INET6: - info = rpcb_next_version6; + proc = rpcb_next_version6[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers; break; default: status = -EAFNOSUPPORT; @@ -324,20 +524,19 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); goto bailout_nofree; } - if (info[xprt->bind_index].rpc_proc == NULL) { + if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __func__); goto bailout_nofree; } - bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, - bind_version, 0); + bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", @@ -360,26 +559,23 @@ void rpcb_getport_async(struct rpc_task *task) map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_status = -EIO; - child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { - status = -EIO; + /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout; + return; } rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; -bailout: - kfree(map); - xprt_put(xprt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); -bailout_nowake: task->tk_status = status; } EXPORT_SYMBOL_GPL(rpcb_getport_async); @@ -418,13 +614,17 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", child->tk_pid, status, map->r_port); - rpcb_wake_rpcbind_waiters(xprt, status); + map->r_status = status; } +/* + * XDR functions for rpcbind + */ + static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { - dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", + dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); @@ -439,7 +639,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb getport result: %u\n", *portp); return 0; } @@ -448,15 +648,15 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set result %u\n", - *boolp); + dprintk("RPC: rpcb set/unset call %s\n", + (*boolp ? "succeeded" : "failed")); return 0; } static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { - dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", + dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); @@ -572,52 +772,60 @@ out_err: static struct rpc_procinfo rpcb_procedures2[] = { PROC(SET, mapping, set), PROC(UNSET, mapping, set), - PROC(GETADDR, mapping, getport), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), + PROC(GETADDR, getaddr, getaddr), PROC(GETVERSADDR, getaddr, getaddr), }; static struct rpcb_info rpcb_next_version[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, - { 0, NULL }, + { + .rpc_vers = RPCBVERS_2, + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + }, + { + .rpc_proc = NULL, + }, }; static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 0, NULL }, + { + .rpc_vers = RPCBVERS_4, + .rpc_proc = &rpcb_procedures4[RPCBPROC_GETADDR], + }, + { + .rpc_vers = RPCBVERS_3, + .rpc_proc = &rpcb_procedures3[RPCBPROC_GETADDR], + }, + { + .rpc_proc = NULL, + }, }; static struct rpc_version rpcb_version2 = { - .number = 2, + .number = RPCBVERS_2, .nrprocs = RPCB_HIGHPROC_2, .procs = rpcb_procedures2 }; static struct rpc_version rpcb_version3 = { - .number = 3, + .number = RPCBVERS_3, .nrprocs = RPCB_HIGHPROC_3, .procs = rpcb_procedures3 }; static struct rpc_version rpcb_version4 = { - .number = 4, + .number = RPCBVERS_4, .nrprocs = RPCB_HIGHPROC_4, .procs = rpcb_procedures4 }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6eab9bf94baf..385f427bedad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* @@ -626,19 +619,15 @@ static void __rpc_execute(struct rpc_task *task) /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 01c7e311b904..54c98d876847 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kthread.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -27,6 +28,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP +static void svc_unregister(const struct svc_serv *serv); + #define svc_serv_is_pooled(serv) ((serv)->sv_function) /* @@ -291,15 +294,14 @@ svc_pool_map_put(void) /* - * Set the current thread's cpus_allowed mask so that it + * Set the given thread's cpus_allowed mask so that it * will only run on cpus in the given pool. - * - * Returns 1 and fills in oldmask iff a cpumask was applied. */ -static inline int -svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) +static inline void +svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) { struct svc_pool_map *m = &svc_pool_map; + unsigned int node = m->pool_to[pidx]; /* * The caller checks for sv_nrpools > 1, which @@ -307,26 +309,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) */ BUG_ON(m->count == 0); - switch (m->mode) - { - default: - return 0; + switch (m->mode) { case SVC_POOL_PERCPU: { - unsigned int cpu = m->pool_to[pidx]; - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - return 1; + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); + break; } case SVC_POOL_PERNODE: { - unsigned int node = m->pool_to[pidx]; node_to_cpumask_ptr(nodecpumask, node); - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, nodecpumask); - return 1; + set_cpus_allowed_ptr(task, nodecpumask); + break; } } } @@ -366,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv)) + sa_family_t family, void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -375,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; + serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -425,34 +419,32 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, spin_lock_init(&pool->sp_lock); } - /* Remove any stale portmap registrations */ - svc_register(serv, 0, 0); + svc_unregister(serv); return serv; } struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv)) + sa_family_t family, void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); } EXPORT_SYMBOL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv), - svc_thread_fn func, int sig, struct module *mod) + sa_family_t family, void (*shutdown)(struct svc_serv *serv), + svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, shutdown); + serv = __svc_create(prog, bufsize, npools, family, shutdown); if (serv != NULL) { serv->sv_function = func; - serv->sv_kill_signal = sig; serv->sv_module = mod; } @@ -461,7 +453,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, EXPORT_SYMBOL(svc_create_pooled); /* - * Destroy an RPC service. Should be called with the BKL held + * Destroy an RPC service. Should be called with appropriate locking to + * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. */ void svc_destroy(struct svc_serv *serv) @@ -495,8 +488,7 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); - /* Unregister service with the portmapper */ - svc_register(serv, 0, 0); + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); } @@ -578,46 +570,6 @@ out_enomem: EXPORT_SYMBOL(svc_prepare_thread); /* - * Create a thread in the given pool. Caller must hold BKL. - * On a NUMA or SMP machine, with a multi-pool serv, the thread - * will be restricted to run on the cpus belonging to the pool. - */ -static int -__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, - struct svc_pool *pool) -{ - struct svc_rqst *rqstp; - int error = -ENOMEM; - int have_oldmask = 0; - cpumask_t uninitialized_var(oldmask); - - rqstp = svc_prepare_thread(serv, pool); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); - goto out; - } - - if (serv->sv_nrpools > 1) - have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); - - error = kernel_thread((int (*)(void *)) func, rqstp, 0); - - if (have_oldmask) - set_cpus_allowed(current, oldmask); - - if (error < 0) - goto out_thread; - svc_sock_update_bufs(serv); - error = 0; -out: - return error; - -out_thread: - svc_exit_thread(rqstp); - goto out; -} - -/* * Choose a pool in which to create a new thread, for svc_set_num_threads */ static inline struct svc_pool * @@ -674,7 +626,7 @@ found_pool: * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Must be called with a svc_get() reference and - * the BKL held. + * the BKL or another lock to protect access to svc_serv fields. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv @@ -686,7 +638,9 @@ found_pool: int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct task_struct *victim; + struct svc_rqst *rqstp; + struct task_struct *task; + struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; @@ -702,18 +656,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) /* create new threads */ while (nrservs > 0) { nrservs--; + chosen_pool = choose_pool(serv, pool, &state); + + rqstp = svc_prepare_thread(serv, chosen_pool); + if (IS_ERR(rqstp)) { + error = PTR_ERR(rqstp); + break; + } + __module_get(serv->sv_module); - error = __svc_create_thread(serv->sv_function, serv, - choose_pool(serv, pool, &state)); - if (error < 0) { + task = kthread_create(serv->sv_function, rqstp, serv->sv_name); + if (IS_ERR(task)) { + error = PTR_ERR(task); module_put(serv->sv_module); + svc_exit_thread(rqstp); break; } + + rqstp->rq_task = task; + if (serv->sv_nrpools > 1) + svc_pool_map_set_cpumask(task, chosen_pool->sp_id); + + svc_sock_update_bufs(serv); + wake_up_process(task); } /* destroy old threads */ while (nrservs < 0 && - (victim = choose_victim(serv, pool, &state)) != NULL) { - send_sig(serv->sv_kill_signal, victim, 1); + (task = choose_victim(serv, pool, &state)) != NULL) { + send_sig(SIGINT, task, 1); nrservs++; } @@ -722,7 +692,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold BKL. + * Called from a server thread as it's exiting. Caller must hold the BKL or + * the "service mutex", whichever is appropriate for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) @@ -748,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL(svc_exit_thread); +#ifdef CONFIG_SUNRPC_REGISTER_V4 + /* - * Register an RPC service with the local portmapper. - * To unregister a service, call this routine with - * proto and port == 0. + * Register an "inet" protocol family netid with the local + * rpcbind daemon via an rpcbind v4 SET request. + * + * No netconfig infrastructure is available in the kernel, so + * we map IP_ protocol numbers to netids by hand. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. */ -int -svc_register(struct svc_serv *serv, int proto, unsigned short port) +static int __svc_rpcb_register4(const u32 program, const u32 version, + const unsigned short protocol, + const unsigned short port) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + .sin_port = htons(port), + }; + char *netid; + + switch (protocol) { + case IPPROTO_UDP: + netid = RPCBIND_NETID_UDP; + break; + case IPPROTO_TCP: + netid = RPCBIND_NETID_TCP; + break; + default: + return -EPROTONOSUPPORT; + } + + return rpcb_v4_register(program, version, + (struct sockaddr *)&sin, netid); +} + +/* + * Register an "inet6" protocol family netid with the local + * rpcbind daemon via an rpcbind v4 SET request. + * + * No netconfig infrastructure is available in the kernel, so + * we map IP_ protocol numbers to netids by hand. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_rpcb_register6(const u32 program, const u32 version, + const unsigned short protocol, + const unsigned short port) +{ + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + char *netid; + + switch (protocol) { + case IPPROTO_UDP: + netid = RPCBIND_NETID_UDP6; + break; + case IPPROTO_TCP: + netid = RPCBIND_NETID_TCP6; + break; + default: + return -EPROTONOSUPPORT; + } + + return rpcb_v4_register(program, version, + (struct sockaddr *)&sin6, netid); +} + +/* + * Register a kernel RPC service via rpcbind version 4. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_register(const u32 program, const u32 version, + const sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + int error; + + switch (family) { + case AF_INET: + return __svc_rpcb_register4(program, version, + protocol, port); + case AF_INET6: + error = __svc_rpcb_register6(program, version, + protocol, port); + if (error < 0) + return error; + + /* + * Work around bug in some versions of Linux rpcbind + * which don't allow registration of both inet and + * inet6 netids. + * + * Error return ignored for now. + */ + __svc_rpcb_register4(program, version, + protocol, port); + return 0; + } + + return -EAFNOSUPPORT; +} + +#else /* CONFIG_SUNRPC_REGISTER_V4 */ + +/* + * Register a kernel RPC service via rpcbind version 2. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_register(const u32 program, const u32 version, + sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + if (family != AF_INET) + return -EAFNOSUPPORT; + + return rpcb_register(program, version, protocol, port); +} + +#endif /* CONFIG_SUNRPC_REGISTER_V4 */ + +/** + * svc_register - register an RPC service with the local portmapper + * @serv: svc_serv struct for the service to register + * @proto: transport protocol number to advertise + * @port: port to advertise + * + * Service is registered for any address in serv's address family + */ +int svc_register(const struct svc_serv *serv, const unsigned short proto, + const unsigned short port) { struct svc_program *progp; - unsigned long flags; unsigned int i; - int error = 0, dummy; + int error = 0; - if (!port) - clear_thread_flag(TIF_SIGPENDING); + BUG_ON(proto == 0 && port == 0); for (progp = serv->sv_program; progp; progp = progp->pg_next) { for (i = 0; i < progp->pg_nvers; i++) { if (progp->pg_vers[i] == NULL) continue; - dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", + dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", progp->pg_name, + i, proto == IPPROTO_UDP? "udp" : "tcp", port, - i, + serv->sv_family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); if (progp->pg_vers[i]->vs_hidden) continue; - error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); + error = __svc_register(progp->pg_prog, i, + serv->sv_family, proto, port); if (error < 0) break; - if (port && !dummy) { - error = -EACCES; - break; - } } } - if (!port) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + return error; +} + +#ifdef CONFIG_SUNRPC_REGISTER_V4 + +static void __svc_unregister(const u32 program, const u32 version, + const char *progname) +{ + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = 0, + }; + int error; + + error = rpcb_v4_register(program, version, + (struct sockaddr *)&sin6, ""); + dprintk("svc: %s(%sv%u), error %d\n", + __func__, progname, version, error); +} + +#else /* CONFIG_SUNRPC_REGISTER_V4 */ + +static void __svc_unregister(const u32 program, const u32 version, + const char *progname) +{ + int error; + + error = rpcb_register(program, version, 0, 0); + dprintk("svc: %s(%sv%u), error %d\n", + __func__, progname, version, error); +} + +#endif /* CONFIG_SUNRPC_REGISTER_V4 */ + +/* + * All netids, bind addresses and ports registered for [program, version] + * are removed from the local rpcbind database (if the service is not + * hidden) to make way for a new instance of the service. + * + * The result of unregistration is reported via dprintk for those who want + * verification of the result, but is otherwise not important. + */ +static void svc_unregister(const struct svc_serv *serv) +{ + struct svc_program *progp; + unsigned long flags; + unsigned int i; + + clear_thread_flag(TIF_SIGPENDING); + + for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (i = 0; i < progp->pg_nvers; i++) { + if (progp->pg_vers[i] == NULL) + continue; + if (progp->pg_vers[i]->vs_hidden) + continue; + + __svc_unregister(progp->pg_prog, i, progp->pg_name); + } } - return error; + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e46c825f4954..bf5b5cdafebf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, } EXPORT_SYMBOL_GPL(svc_xprt_init); -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, - int flags) +static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, + struct svc_serv *serv, + unsigned short port, int flags) { - struct svc_xprt_class *xcl; struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + struct sockaddr *sap; + size_t len; + + switch (serv->sv_family) { + case AF_INET: + sap = (struct sockaddr *)&sin; + len = sizeof(sin); + break; + case AF_INET6: + sap = (struct sockaddr *)&sin6; + len = sizeof(sin6); + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + return xcl->xcl_ops->xpo_create(serv, sap, len, flags); +} + +int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, + int flags) +{ + struct svc_xprt_class *xcl; + dprintk("svc: creating transport %s[%d]\n", xprt_name, port); spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { @@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = xcl->xcl_ops-> - xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), - flags); + newxprt = __svc_xpo_create(xcl, serv, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e65719f1ef6..95293f549e9c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); + int val; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); + /* + * We start one listener per sv_serv. We want AF_INET + * requests to be automatically shunted to our AF_INET6 + * listener using a mapped IPv4 address. Make sure + * no-one starts an equivalent IPv4 listener, which + * would steal our incoming connections. + */ + val = 0; + if (serv->sv_family == AF_INET6) + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int svc_addsock(struct svc_serv *serv, int fd, - char *name_return, - int *proto) + char *name_return) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv, sockfd_put(so); return err; } - if (proto) *proto = so->sk->sk_protocol; return one_sock_name(name_return, svsk); } EXPORT_SYMBOL_GPL(svc_addsock); diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 0f8c439b848a..5231f7aaac0e 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -60,24 +60,14 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; - int len; + size_t len; + if ((*ppos && !write) || !*lenp) { *lenp = 0; return 0; } - if (write) - return -EINVAL; - else { - len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); - if (!access_ok(VERIFY_WRITE, buffer, len)) - return -EFAULT; - - if (__copy_to_user(buffer, tmpbuf, len)) - return -EFAULT; - } - *lenp -= len; - *ppos += len; - return 0; + len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); + return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); } static int diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e1770f7ba0b3..99a52aabe332 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,7 +690,7 @@ static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - if (task->tk_status >= 0) { + if (task->tk_status == 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; dprintk("RPC: %5u xprt_connect_status: connection established\n", @@ -699,12 +699,6 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ECONNREFUSED: - case -ECONNRESET: - dprintk("RPC: %5u xprt_connect_status: server %s refused " - "connection\n", task->tk_pid, - task->tk_client->cl_server); - break; case -ENOTCONN: dprintk("RPC: %5u xprt_connect_status: connection broken\n", task->tk_pid); @@ -878,6 +872,7 @@ void xprt_transmit(struct rpc_task *task) return; req->rq_connect_cookie = xprt->connect_cookie; + req->rq_xtime = jiffies; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e55427f73dfe..5c1954d28d09 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -769,7 +769,7 @@ repost: /* check for expected message types */ /* The order of some of these tests is important. */ switch (headerp->rm_type) { - case __constant_htonl(RDMA_MSG): + case htonl(RDMA_MSG): /* never expect read chunks */ /* never expect reply chunks (two ways to check) */ /* never expect write chunks without having offered RDMA */ @@ -802,7 +802,7 @@ repost: rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); break; - case __constant_htonl(RDMA_NOMSG): + case htonl(RDMA_NOMSG): /* never expect read or write chunks, always reply chunks */ if (headerp->rm_body.rm_chunks[0] != xdr_zero || headerp->rm_body.rm_chunks[1] != xdr_zero || diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 88c0ca20bb1e..87101177825b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod; atomic_t rdma_stat_sq_poll; atomic_t rdma_stat_sq_prod; +/* Temporary NFS request map and context caches */ +struct kmem_cache *svc_rdma_map_cachep; +struct kmem_cache *svc_rdma_ctxt_cachep; + /* * This function implements reading and resetting an atomic_t stat * variable through read/write to a proc file. Any write to the file @@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = { void svc_rdma_cleanup(void) { dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); + flush_scheduled_work(); if (svcrdma_table_header) { unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; } svc_unreg_xprt_class(&svc_rdma_class); + kmem_cache_destroy(svc_rdma_map_cachep); + kmem_cache_destroy(svc_rdma_ctxt_cachep); } int svc_rdma_init(void) @@ -255,9 +262,37 @@ int svc_rdma_init(void) svcrdma_table_header = register_sysctl_table(svcrdma_root_table); + /* Create the temporary map cache */ + svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache", + sizeof(struct svc_rdma_req_map), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!svc_rdma_map_cachep) { + printk(KERN_INFO "Could not allocate map cache.\n"); + goto err0; + } + + /* Create the temporary context cache */ + svc_rdma_ctxt_cachep = + kmem_cache_create("svc_rdma_ctxt_cache", + sizeof(struct svc_rdma_op_ctxt), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!svc_rdma_ctxt_cachep) { + printk(KERN_INFO "Could not allocate WR ctxt cache.\n"); + goto err1; + } + /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); return 0; + err1: + kmem_cache_destroy(svc_rdma_map_cachep); + err0: + unregister_sysctl_table(svcrdma_table_header); + return -ENOMEM; } MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); MODULE_DESCRIPTION("SVC RDMA Transport"); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 06ab4841537b..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -112,16 +112,11 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -struct chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; - /* Encode a read-chunk-list as an array of IB SGE * * Assumptions: * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: @@ -130,12 +125,12 @@ struct chunk_sge { * chunk in the read list * */ -static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, +static int map_read_chunks(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, - struct ib_sge *sge, - struct chunk_sge *ch_sge_ary, + struct svc_rdma_req_map *rpl_map, + struct svc_rdma_req_map *chl_map, int ch_count, int byte_count) { @@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; - head->sge[0].length = head->count; /* save count of hdr pages */ + head->hdr_count = head->count; /* save count of hdr pages */ head->arg.page_base = 0; head->arg.page_len = ch_bytes; head->arg.len = rqstp->rq_arg.len + ch_bytes; head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; head->count++; - ch_sge_ary[0].start = 0; + chl_map->ch[0].start = 0; while (byte_count) { + rpl_map->sge[sge_no].iov_base = + page_address(rqstp->rq_arg.pages[page_no]) + page_off; sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); - sge[sge_no].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - rqstp->rq_arg.pages[page_no], - page_off, sge_bytes, - DMA_FROM_DEVICE); - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + rpl_map->sge[sge_no].iov_len = sge_bytes; /* * Don't bump head->count here because the same page * may be used by multiple SGE. @@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, * SGE, move to the next SGE */ if (ch_bytes == 0) { - ch_sge_ary[ch_no].count = - sge_no - ch_sge_ary[ch_no].start; + chl_map->ch[ch_no].count = + sge_no - chl_map->ch[ch_no].start; ch_no++; ch++; - ch_sge_ary[ch_no].start = sge_no; + chl_map->ch[ch_no].start = sge_no; ch_bytes = ch->rc_target.rs_length; /* If bytes remaining account for next chunk */ if (byte_count) { @@ -220,19 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, return sge_no; } -static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, - struct ib_sge *sge, - u64 *sgl_offset, - int count) +/* Map a read-chunk-list to an XDR and fast register the page-list. + * + * Assumptions: + * - chunk[0] position points to pages[0] at an offset of 0 + * - pages[] will be made physically contiguous by creating a one-off memory + * region using the fastreg verb. + * - byte_count is # of bytes in read-chunk-list + * - ch_count is # of chunks in read-chunk-list + * + * Output: + * - sge array pointing into pages[] array. + * - chunk_sge array specifying sge index and count for each + * chunk in the read list + */ +static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + struct rpcrdma_msg *rmsgp, + struct svc_rdma_req_map *rpl_map, + struct svc_rdma_req_map *chl_map, + int ch_count, + int byte_count) +{ + int page_no; + int ch_no; + u32 offset; + struct rpcrdma_read_chunk *ch; + struct svc_rdma_fastreg_mr *frmr; + int ret = 0; + + frmr = svc_rdma_get_frmr(xprt); + if (IS_ERR(frmr)) + return -ENOMEM; + + head->frmr = frmr; + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; /* save count of hdr pages */ + head->arg.page_base = 0; + head->arg.page_len = byte_count; + head->arg.len = rqstp->rq_arg.len + byte_count; + head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + + /* Fast register the page list */ + frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->direction = DMA_FROM_DEVICE; + frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); + frmr->map_len = byte_count; + frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(rqstp->rq_arg.pages[page_no]), + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; + } + head->count += page_no; + + /* rq_respages points one past arg pages */ + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; + + /* Create the reply and chunk maps */ + offset = 0; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + for (ch_no = 0; ch_no < ch_count; ch_no++) { + rpl_map->sge[ch_no].iov_base = frmr->kva + offset; + rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; + chl_map->ch[ch_no].count = 1; + chl_map->ch[ch_no].start = ch_no; + offset += ch->rc_target.rs_length; + ch++; + } + + ret = svc_rdma_fastreg(xprt, frmr); + if (ret) + goto fatal_err; + + return ch_no; + + fatal_err: + printk("svcrdma: error fast registering xdr for xprt %p", xprt); + svc_rdma_put_frmr(xprt, frmr); + return -EIO; +} + +static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt, + struct svc_rdma_fastreg_mr *frmr, + struct kvec *vec, + u64 *sgl_offset, + int count) { int i; ctxt->count = count; + ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { - ctxt->sge[i].addr = sge[i].addr; - ctxt->sge[i].length = sge[i].length; - *sgl_offset = *sgl_offset + sge[i].length; + ctxt->sge[i].length = 0; /* in case map fails */ + if (!frmr) { + ctxt->sge[i].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + vec[i].iov_base, + vec[i].iov_len, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[i].addr)) + return -EINVAL; + ctxt->sge[i].lkey = xprt->sc_dma_lkey; + atomic_inc(&xprt->sc_dma_used); + } else { + ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; + ctxt->sge[i].lkey = frmr->mr->lkey; + } + ctxt->sge[i].length = vec[i].iov_len; + *sgl_offset = *sgl_offset + vec[i].iov_len; } + return 0; } static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) @@ -280,37 +380,44 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; int err = 0; int ch_no; - struct ib_sge *sge; int ch_count; int byte_count; int sge_count; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_op_ctxt *tmp_sge_ctxt; - struct svc_rdma_op_ctxt *tmp_ch_ctxt; - struct chunk_sge *ch_sge_ary; + struct svc_rdma_req_map *rpl_map; + struct svc_rdma_req_map *chl_map; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; - /* Allocate temporary contexts to keep SGE */ - BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); - tmp_sge_ctxt = svc_rdma_get_context(xprt); - sge = tmp_sge_ctxt->sge; - tmp_ch_ctxt = svc_rdma_get_context(xprt); - ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, - sge, ch_sge_ary, - ch_count, byte_count); + + if (!xprt->sc_frmr_pg_list_len) + sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + else + sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + if (sge_count < 0) { + err = -EIO; + goto out; + } + sgl_offset = 0; ch_no = 0; @@ -319,32 +426,64 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = hdr_ctxt->frmr; + ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); - ctxt->wr_op = IB_WR_RDMA_READ; read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = get_unaligned(&(ch->rc_target.rs_offset)) + sgl_offset; - read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; + read_wr.sg_list = ctxt->sge; read_wr.num_sge = - rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); - rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], - &sgl_offset, - read_wr.num_sge); + rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); + err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, + &rpl_map->sge[chl_map->ch[ch_no].start], + &sgl_offset, + read_wr.num_sge); + if (err) { + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + goto out; + } if (((ch+1)->rc_discrim == 0) && - (read_wr.num_sge == ch_sge_ary[ch_no].count)) { + (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* * Mark the last RDMA_READ with a bit to * indicate all RPC data has been fetched from * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + if (hdr_ctxt->frmr) { + set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + /* + * Invalidate the local MR used to map the data + * sink. + */ + if (xprt->sc_dev_caps & + SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = + IB_WR_RDMA_READ_WITH_INV; + ctxt->wr_op = read_wr.opcode; + read_wr.ex.invalidate_rkey = + ctxt->frmr->mr->lkey; + } else { + /* Prepare INVALIDATE WR */ + memset(&inv_wr, 0, sizeof inv_wr); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = + hdr_ctxt->frmr->mr->lkey; + read_wr.next = &inv_wr; + } + } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ @@ -358,9 +497,9 @@ next_sge: } atomic_inc(&rdma_stat_read); - if (read_wr.num_sge < ch_sge_ary[ch_no].count) { - ch_sge_ary[ch_no].count -= read_wr.num_sge; - ch_sge_ary[ch_no].start += read_wr.num_sge; + if (read_wr.num_sge < chl_map->ch[ch_no].count) { + chl_map->ch[ch_no].count -= read_wr.num_sge; + chl_map->ch[ch_no].start += read_wr.num_sge; goto next_sge; } sgl_offset = 0; @@ -368,8 +507,8 @@ next_sge: } out: - svc_rdma_put_context(tmp_sge_ctxt, 0); - svc_rdma_put_context(tmp_ch_ctxt, 0); + svc_rdma_put_req_map(rpl_map); + svc_rdma_put_req_map(chl_map); /* Detach arg pages. svc_recv will replenish them */ for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) @@ -399,7 +538,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_pages[page_no] = head->pages[page_no]; } /* Point rq_arg.pages past header */ - rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; + rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; @@ -449,18 +588,18 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); - spin_lock_bh(&rdma_xprt->sc_read_complete_lock); + spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); } - spin_unlock_bh(&rdma_xprt->sc_read_complete_lock); - if (ctxt) + if (ctxt) { + spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); return rdma_read_complete(rqstp, ctxt); + } - spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index fb82b1b683f8..9a7a8e7ae038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -63,52 +63,165 @@ * SGE[2..sge_count-2] data from xdr->pages[] * SGE[sge_count-1] data from xdr->tail. * + * The max SGE we need is the length of the XDR / pagesize + one for + * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES + * reserves a page for both the request and the reply header, and this + * array is only concerned with the reply we are assured that we have + * on extra page for the RPCRMDA header. */ -static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct ib_sge *sge, - int *sge_count) +int fast_reg_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) +{ + int sge_no; + u32 sge_bytes; + u32 page_bytes; + u32 page_off; + int page_no = 0; + u8 *frva; + struct svc_rdma_fastreg_mr *frmr; + + frmr = svc_rdma_get_frmr(xprt); + if (IS_ERR(frmr)) + return -ENOMEM; + vec->frmr = frmr; + + /* Skip the RPCRDMA header */ + sge_no = 1; + + /* Map the head. */ + frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); + vec->sge[sge_no].iov_base = xdr->head[0].iov_base; + vec->sge[sge_no].iov_len = xdr->head[0].iov_len; + vec->count = 2; + sge_no++; + + /* Build the FRMR */ + frmr->kva = frva; + frmr->direction = DMA_TO_DEVICE; + frmr->access_flags = 0; + frmr->map_len = PAGE_SIZE; + frmr->page_list_len = 1; + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, + (void *)xdr->head[0].iov_base, + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + + page_off = xdr->page_base; + page_bytes = xdr->page_len + page_off; + if (!page_bytes) + goto encode_tail; + + /* Map the pages */ + vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; + vec->sge[sge_no].iov_len = page_bytes; + sge_no++; + while (page_bytes) { + struct page *page; + + page = xdr->pages[page_no++]; + sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); + page_bytes -= sge_bytes; + + frmr->page_list->page_list[page_no] = + ib_dma_map_page(xprt->sc_cm_id->device, page, 0, + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + + atomic_inc(&xprt->sc_dma_used); + page_off = 0; /* reset for next time through loop */ + frmr->map_len += PAGE_SIZE; + frmr->page_list_len++; + } + vec->count++; + + encode_tail: + /* Map tail */ + if (0 == xdr->tail[0].iov_len) + goto done; + + vec->count++; + vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; + + if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == + ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { + /* + * If head and tail use the same page, we don't need + * to map it again. + */ + vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; + } else { + void *va; + + /* Map another page for the tail */ + page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); + vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; + + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + frmr->map_len += PAGE_SIZE; + frmr->page_list_len++; + } + + done: + if (svc_rdma_fastreg(xprt, frmr)) + goto fatal_err; + + return 0; + + fatal_err: + printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); + svc_rdma_put_frmr(xprt, frmr); + return -EIO; +} + +static int map_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { - /* Max we need is the length of the XDR / pagesize + one for - * head + one for tail + one for RPCRDMA header - */ int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; - u32 byte_count = xdr->len; u32 sge_bytes; u32 page_bytes; - int page_off; + u32 page_off; int page_no; + BUG_ON(xdr->len != + (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + + if (xprt->sc_frmr_pg_list_len) + return fast_reg_xdr(xprt, xdr, vec); + /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; /* Head SGE */ - sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, - xdr->head[0].iov_base, - xdr->head[0].iov_len, - DMA_TO_DEVICE); - sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); - byte_count -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + vec->sge[sge_no].iov_base = xdr->head[0].iov_base; + vec->sge[sge_no].iov_len = xdr->head[0].iov_len; sge_no++; /* pages SGE */ page_no = 0; page_bytes = xdr->page_len; page_off = xdr->page_base; - while (byte_count && page_bytes) { - sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); - sge[sge_no].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - xdr->pages[page_no], page_off, - sge_bytes, DMA_TO_DEVICE); - sge_bytes = min(sge_bytes, page_bytes); - byte_count -= sge_bytes; + while (page_bytes) { + vec->sge[sge_no].iov_base = + page_address(xdr->pages[page_no]) + page_off; + sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); page_bytes -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + vec->sge[sge_no].iov_len = sge_bytes; sge_no++; page_no++; @@ -116,36 +229,27 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, } /* Tail SGE */ - if (byte_count && xdr->tail[0].iov_len) { - sge[sge_no].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - xdr->tail[0].iov_base, - xdr->tail[0].iov_len, - DMA_TO_DEVICE); - sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); - byte_count -= sge_bytes; - sge[sge_no].length = sge_bytes; - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + if (xdr->tail[0].iov_len) { + vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; + vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; sge_no++; } BUG_ON(sge_no > sge_max); - BUG_ON(byte_count != 0); - - *sge_count = sge_no; - return sge; + vec->count = sge_no; + return 0; } - /* Assumptions: + * - We are using FRMR + * - or - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 rmr, u64 to, u32 xdr_off, int write_len, - struct ib_sge *xdr_sge, int sge_count) + struct svc_rdma_req_map *vec) { - struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct ib_send_wr write_wr; struct ib_sge *sge; int xdr_sge_no; @@ -154,25 +258,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, int sge_off; int bc; struct svc_rdma_op_ctxt *ctxt; - int ret = 0; - BUG_ON(sge_count > RPCSVC_MAXPAGES); + BUG_ON(vec->count > RPCSVC_MAXPAGES); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " - "write_len=%d, xdr_sge=%p, sge_count=%d\n", + "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, - write_len, xdr_sge, sge_count); + write_len, vec->sge, vec->count); ctxt = svc_rdma_get_context(xprt); - ctxt->count = 0; - tmp_sge_ctxt = svc_rdma_get_context(xprt); - sge = tmp_sge_ctxt->sge; + ctxt->direction = DMA_TO_DEVICE; + sge = ctxt->sge; /* Find the SGE associated with xdr_off */ - for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; + for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count; xdr_sge_no++) { - if (xdr_sge[xdr_sge_no].length > bc) + if (vec->sge[xdr_sge_no].iov_len > bc) break; - bc -= xdr_sge[xdr_sge_no].length; + bc -= vec->sge[xdr_sge_no].iov_len; } sge_off = bc; @@ -180,22 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_no = 0; /* Copy the remaining SGE */ - while (bc != 0 && xdr_sge_no < sge_count) { - sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; - sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; - sge_bytes = min((size_t)bc, - (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); + while (bc != 0) { + sge_bytes = min_t(size_t, + bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; - + if (!vec->frmr) { + sge[sge_no].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + (void *) + vec->sge[xdr_sge_no].iov_base + sge_off, + sge_bytes, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + sge[sge_no].addr)) + goto err; + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].lkey = xprt->sc_dma_lkey; + } else { + sge[sge_no].addr = (unsigned long) + vec->sge[xdr_sge_no].iov_base + sge_off; + sge[sge_no].lkey = vec->frmr->mr->lkey; + } + ctxt->count++; + ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; xdr_sge_no++; + BUG_ON(xdr_sge_no > vec->count); bc -= sge_bytes; } - BUG_ON(bc != 0); - BUG_ON(xdr_sge_no > sge_count); - /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; @@ -209,21 +324,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, /* Post It */ atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr)) { - svc_rdma_put_context(ctxt, 1); - /* Fatal error, close transport */ - ret = -EIO; - } - svc_rdma_put_context(tmp_sge_ctxt, 0); - return ret; + if (svc_rdma_send(xprt, &write_wr)) + goto err; + return 0; + err: + svc_rdma_put_context(ctxt, 0); + /* Fatal error, close transport */ + return -EIO; } static int send_write_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_resp, struct svc_rqst *rqstp, - struct ib_sge *sge, - int sge_count) + struct svc_rdma_req_map *vec) { u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; int write_len; @@ -241,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - max_write = xprt->sc_max_sge * PAGE_SIZE; + if (vec->frmr) + max_write = vec->frmr->map_len; + else + max_write = xprt->sc_max_sge * PAGE_SIZE; /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; @@ -269,8 +386,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, rs_offset + chunk_off, xdr_off, this_write, - sge, - sge_count); + vec); if (ret) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); @@ -292,8 +408,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rdma_argp, struct rpcrdma_msg *rdma_resp, struct svc_rqst *rqstp, - struct ib_sge *sge, - int sge_count) + struct svc_rdma_req_map *vec) { u32 xfer_len = rqstp->rq_res.len; int write_len; @@ -314,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - max_write = xprt->sc_max_sge * PAGE_SIZE; + if (vec->frmr) + max_write = vec->frmr->map_len; + else + max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ for (xdr_off = 0, chunk_no = 0; @@ -324,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, ch = &arg_ary->wc_array[chunk_no].wc_target; write_len = min(xfer_len, ch->rs_length); - /* Prepare the reply chunk given the length actually * written */ rs_offset = get_unaligned(&(ch->rs_offset)); @@ -341,8 +458,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, rs_offset + chunk_off, xdr_off, this_write, - sge, - sge_count); + vec); if (ret) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); @@ -380,10 +496,11 @@ static int send_reply(struct svcxprt_rdma *rdma, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_op_ctxt *ctxt, - int sge_count, + struct svc_rdma_req_map *vec, int byte_count) { struct ib_send_wr send_wr; + struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; @@ -403,20 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; + ctxt->frmr = vec->frmr; + if (vec->frmr) + set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + else + clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->direction = DMA_TO_DEVICE; + ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); - ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; + ctxt->sge[0].lkey = rdma->sc_dma_lkey; /* Determine how many of our SGE are to be transmitted */ - for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { - sge_bytes = min((size_t)ctxt->sge[sge_no].length, - (size_t)byte_count); + for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { + sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; + if (!vec->frmr) { + ctxt->sge[sge_no].addr = + ib_dma_map_single(rdma->sc_cm_id->device, + vec->sge[sge_no].iov_base, + sge_bytes, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->sc_cm_id->device, + ctxt->sge[sge_no].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; + } else { + ctxt->sge[sge_no].addr = (unsigned long) + vec->sge[sge_no].iov_base; + ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; + } + ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); @@ -428,9 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; + /* + * If there are more pages than SGE, terminate SGE + * list so that svc_rdma_unmap_dma doesn't attempt to + * unmap garbage. + */ + if (page_no+1 >= sge_no) + ctxt->sge[page_no+1].length = 0; } - BUG_ON(sge_no > rdma->sc_max_sge); + BUG_ON(sge_no > ctxt->count); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -438,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; + if (vec->frmr) { + /* Prepare INVALIDATE WR */ + memset(&inv_wr, 0, sizeof inv_wr); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = + vec->frmr->mr->lkey; + send_wr.next = &inv_wr; + } ret = svc_rdma_send(rdma, &send_wr); if (ret) - svc_rdma_put_context(ctxt, 1); + goto err; - return ret; + return 0; + + err: + svc_rdma_put_frmr(rdma, vec->frmr); + svc_rdma_put_context(ctxt, 1); + return -EIO; } void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) @@ -473,21 +636,22 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) enum rpcrdma_proc reply_type; int ret; int inline_bytes; - struct ib_sge *sge; - int sge_count = 0; struct page *res_page; struct svc_rdma_op_ctxt *ctxt; + struct svc_rdma_req_map *vec; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); /* Get the RDMA request header. */ rdma_argp = xdr_start(&rqstp->rq_arg); - /* Build an SGE for the XDR */ + /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; - sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); - + vec = svc_rdma_get_req_map(); + ret = map_xdr(rdma, &rqstp->rq_res, vec); + if (ret) + goto err0; inline_bytes = rqstp->rq_res.len; /* Create the RDMA response header */ @@ -503,30 +667,34 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) /* Send any write-chunk data and build resp write-list */ ret = send_write_chunks(rdma, rdma_argp, rdma_resp, - rqstp, sge, sge_count); + rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", ret); - goto error; + goto err1; } inline_bytes -= ret; /* Send any reply-list data and update resp reply-list */ ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, - rqstp, sge, sge_count); + rqstp, vec); if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", ret); - goto error; + goto err1; } inline_bytes -= ret; - ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, + ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, inline_bytes); + svc_rdma_put_req_map(vec); dprintk("svcrdma: send_reply returns %d\n", ret); return ret; - error: - svc_rdma_put_context(ctxt, 0); + + err1: put_page(res_page); + err0: + svc_rdma_put_req_map(vec); + svc_rdma_put_context(ctxt, 0); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e132509d1db0..6fb493cbd29f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -84,70 +84,46 @@ struct svc_xprt_class svc_rdma_class = { .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, }; -static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) +/* WR context cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_ctxt_cachep; + +struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { - int target; - int at_least_one = 0; struct svc_rdma_op_ctxt *ctxt; - target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, - xprt->sc_ctxt_max); - - spin_lock_bh(&xprt->sc_ctxt_lock); - while (xprt->sc_ctxt_cnt < target) { - xprt->sc_ctxt_cnt++; - spin_unlock_bh(&xprt->sc_ctxt_lock); - - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); - - spin_lock_bh(&xprt->sc_ctxt_lock); - if (ctxt) { - at_least_one = 1; - INIT_LIST_HEAD(&ctxt->free_list); - list_add(&ctxt->free_list, &xprt->sc_ctxt_free); - } else { - /* kmalloc failed...give up for now */ - xprt->sc_ctxt_cnt--; + while (1) { + ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL); + if (ctxt) break; - } + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } - spin_unlock_bh(&xprt->sc_ctxt_lock); - dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", - xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); - return at_least_one; + ctxt->xprt = xprt; + INIT_LIST_HEAD(&ctxt->dto_q); + ctxt->count = 0; + ctxt->frmr = NULL; + atomic_inc(&xprt->sc_ctxt_used); + return ctxt; } -struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { - struct svc_rdma_op_ctxt *ctxt; - - while (1) { - spin_lock_bh(&xprt->sc_ctxt_lock); - if (unlikely(list_empty(&xprt->sc_ctxt_free))) { - /* Try to bump my cache. */ - spin_unlock_bh(&xprt->sc_ctxt_lock); - - if (rdma_bump_context_cache(xprt)) - continue; - - printk(KERN_INFO "svcrdma: sleeping waiting for " - "context memory on xprt=%p\n", - xprt); - schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - continue; + struct svcxprt_rdma *xprt = ctxt->xprt; + int i; + for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { + /* + * Unmap the DMA addr in the SGE if the lkey matches + * the sc_dma_lkey, otherwise, ignore it since it is + * an FRMR lkey and will be unmapped later when the + * last WR that uses it completes. + */ + if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); } - ctxt = list_entry(xprt->sc_ctxt_free.next, - struct svc_rdma_op_ctxt, - free_list); - list_del_init(&ctxt->free_list); - spin_unlock_bh(&xprt->sc_ctxt_lock); - ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->dto_q); - ctxt->count = 0; - atomic_inc(&xprt->sc_ctxt_used); - break; } - return ctxt; } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -161,18 +137,37 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - for (i = 0; i < ctxt->count; i++) - ib_dma_unmap_single(xprt->sc_cm_id->device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); - - spin_lock_bh(&xprt->sc_ctxt_lock); - list_add(&ctxt->free_list, &xprt->sc_ctxt_free); - spin_unlock_bh(&xprt->sc_ctxt_lock); + kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); atomic_dec(&xprt->sc_ctxt_used); } +/* Temporary NFS request map cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_map_cachep; + +/* + * Temporary NFS req mappings are shared across all transport + * instances. These are short lived and should be bounded by the number + * of concurrent server threads * depth of the SQ. + */ +struct svc_rdma_req_map *svc_rdma_get_req_map(void) +{ + struct svc_rdma_req_map *map; + while (1) { + map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL); + if (map) + break; + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); + } + map->count = 0; + map->frmr = NULL; + return map; +} + +void svc_rdma_put_req_map(struct svc_rdma_req_map *map) +{ + kmem_cache_free(svc_rdma_map_cachep, map); +} + /* ib_cq event handler */ static void cq_event_handler(struct ib_event *event, void *context) { @@ -302,6 +297,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; + svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); @@ -330,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) } /* + * Processs a completion context + */ +static void process_context(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt) +{ + svc_rdma_unmap_dma(ctxt); + + switch (ctxt->wr_op) { + case IB_WR_SEND: + if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) + svc_rdma_put_frmr(xprt, ctxt->frmr); + svc_rdma_put_context(ctxt, 1); + break; + + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); + if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) + svc_rdma_put_frmr(xprt, ctxt->frmr); + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_enqueue(&xprt->sc_xprt); + } + svc_rdma_put_context(ctxt, 0); + break; + + default: + printk(KERN_ERR "svcrdma: unexpected completion type, " + "opcode=%d\n", + ctxt->wr_op); + break; + } +} + +/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. @@ -341,16 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; - if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - xprt = ctxt->xprt; - if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -359,32 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); - switch (ctxt->wr_op) { - case IB_WR_SEND: - case IB_WR_RDMA_WRITE: - svc_rdma_put_context(ctxt, 1); - break; - - case IB_WR_RDMA_READ: - if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { - struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - spin_lock_bh(&xprt->sc_read_complete_lock); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_read_complete_lock); - svc_xprt_enqueue(&xprt->sc_xprt); - } - svc_rdma_put_context(ctxt, 0); - break; + ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; + if (ctxt) + process_context(xprt, ctxt); - default: - printk(KERN_ERR "svcrdma: unexpected completion type, " - "opcode=%d, status=%d\n", - wc.opcode, wc.status); - break; - } svc_xprt_put(&xprt->sc_xprt); } @@ -423,40 +437,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) tasklet_schedule(&dto_tasklet); } -static void create_context_cache(struct svcxprt_rdma *xprt, - int ctxt_count, int ctxt_bump, int ctxt_max) -{ - struct svc_rdma_op_ctxt *ctxt; - int i; - - xprt->sc_ctxt_max = ctxt_max; - xprt->sc_ctxt_bump = ctxt_bump; - xprt->sc_ctxt_cnt = 0; - atomic_set(&xprt->sc_ctxt_used, 0); - - INIT_LIST_HEAD(&xprt->sc_ctxt_free); - for (i = 0; i < ctxt_count; i++) { - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); - if (ctxt) { - INIT_LIST_HEAD(&ctxt->free_list); - list_add(&ctxt->free_list, &xprt->sc_ctxt_free); - xprt->sc_ctxt_cnt++; - } - } -} - -static void destroy_context_cache(struct svcxprt_rdma *xprt) -{ - while (!list_empty(&xprt->sc_ctxt_free)) { - struct svc_rdma_op_ctxt *ctxt; - ctxt = list_entry(xprt->sc_ctxt_free.next, - struct svc_rdma_op_ctxt, - free_list); - list_del_init(&ctxt->free_list); - kfree(ctxt); - } -} - static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, int listener) { @@ -469,12 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); + INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); - spin_lock_init(&cma_xprt->sc_read_complete_lock); - spin_lock_init(&cma_xprt->sc_ctxt_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + spin_lock_init(&cma_xprt->sc_frmr_q_lock); cma_xprt->sc_ord = svcrdma_ord; @@ -482,21 +462,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, cma_xprt->sc_max_requests = svcrdma_max_requests; cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; atomic_set(&cma_xprt->sc_sq_count, 0); + atomic_set(&cma_xprt->sc_ctxt_used, 0); - if (!listener) { - int reqs = cma_xprt->sc_max_requests; - create_context_cache(cma_xprt, - reqs << 1, /* starting size */ - reqs, /* bump amount */ - reqs + - cma_xprt->sc_sq_depth + - RPCRDMA_MAX_THREADS + 1); /* max */ - if (list_empty(&cma_xprt->sc_ctxt_free)) { - kfree(cma_xprt); - return NULL; - } - clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); - } else + if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); return cma_xprt; @@ -520,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; - unsigned long pa; + dma_addr_t pa; int sge_no; int buflen; int ret; @@ -535,9 +503,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) + goto err_put_ctxt; + atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; - ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; buflen += PAGE_SIZE; } ctxt->count = sge_no; @@ -553,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) svc_rdma_put_context(ctxt, 1); } return ret; + + err_put_ctxt: + svc_rdma_put_context(ctxt, 1); + return -ENOMEM; } /* @@ -566,7 +541,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) * will call the recvfrom method on the listen xprt which will accept the new * connection. */ -static void handle_connect_req(struct rdma_cm_id *new_cma_id) +static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; @@ -583,6 +558,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); + /* Save client advertised inbound read limit for use later in accept. */ + newxprt->sc_ord = client_ird; + /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); @@ -619,7 +597,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_CONNECT_REQUEST: dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " "event=%d\n", cma_id, cma_id->context, event->event); - handle_connect_req(cma_id); + handle_connect_req(cma_id, + event->param.conn.initiator_depth); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -739,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(ret); } +static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) +{ + struct ib_mr *mr; + struct ib_fast_reg_page_list *pl; + struct svc_rdma_fastreg_mr *frmr; + + frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); + if (!frmr) + goto err; + + mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + if (!mr) + goto err_free_frmr; + + pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, + RPCSVC_MAXPAGES); + if (!pl) + goto err_free_mr; + + frmr->mr = mr; + frmr->page_list = pl; + INIT_LIST_HEAD(&frmr->frmr_list); + return frmr; + + err_free_mr: + ib_dereg_mr(mr); + err_free_frmr: + kfree(frmr); + err: + return ERR_PTR(-ENOMEM); +} + +static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_fastreg_mr *frmr; + + while (!list_empty(&xprt->sc_frmr_q)) { + frmr = list_entry(xprt->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + ib_dereg_mr(frmr->mr); + ib_free_fast_reg_page_list(frmr->page_list); + kfree(frmr); + } +} + +struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_fastreg_mr *frmr = NULL; + + spin_lock_bh(&rdma->sc_frmr_q_lock); + if (!list_empty(&rdma->sc_frmr_q)) { + frmr = list_entry(rdma->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + frmr->map_len = 0; + frmr->page_list_len = 0; + } + spin_unlock_bh(&rdma->sc_frmr_q_lock); + if (frmr) + return frmr; + + return rdma_alloc_frmr(rdma); +} + +static void frmr_unmap_dma(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + int page_no; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + dma_addr_t addr = frmr->page_list->page_list[page_no]; + if (ib_dma_mapping_error(frmr->mr->device, addr)) + continue; + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); + } +} + +void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, + struct svc_rdma_fastreg_mr *frmr) +{ + if (frmr) { + frmr_unmap_dma(rdma, frmr); + spin_lock_bh(&rdma->sc_frmr_q_lock); + BUG_ON(!list_empty(&frmr->frmr_list)); + list_add(&frmr->frmr_list, &rdma->sc_frmr_q); + spin_unlock_bh(&rdma->sc_frmr_q_lock); + } +} + /* * This is the xpo_recvfrom function for listening endpoints. Its * purpose is to accept incoming connections. The CMA callback handler @@ -757,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; + int dma_mr_acc; + int need_dma_mr; int ret; int i; @@ -793,8 +865,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; - newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, - (size_t)svcrdma_ord); + /* + * Limit ORD based on client limit, local device limit, and + * configured svcrdma limit. + */ + newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord); + newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); if (IS_ERR(newxprt->sc_pd)) { @@ -868,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } newxprt->sc_qp = newxprt->sc_cm_id->qp; - /* Register all of physical memory */ - newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(newxprt->sc_phys_mr)) { - dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); + /* + * Use the most secure set of MR resources based on the + * transport type and available memory management features in + * the device. Here's the table implemented below: + * + * Fast Global DMA Remote WR + * Reg LKEY MR Access + * Sup'd Sup'd Needed Needed + * + * IWARP N N Y Y + * N Y Y Y + * Y N Y N + * Y Y N - + * + * IB N N Y N + * N Y N - + * Y N Y N + * Y Y N - + * + * NB: iWARP requires remote write access for the data sink + * of an RDMA_READ. IB does not. + */ + if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + newxprt->sc_frmr_pg_list_len = + devattr.max_fast_reg_page_list_len; + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + } + + /* + * Determine if a DMA MR is required and if so, what privs are required + */ + switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { + case RDMA_TRANSPORT_IWARP: + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = + (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + case RDMA_TRANSPORT_IB: + if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + default: goto errout; } + /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ + if (need_dma_mr) { + /* Register all of physical memory */ + newxprt->sc_phys_mr = + ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); + if (IS_ERR(newxprt->sc_phys_mr)) { + dprintk("svcrdma: Failed to create DMA MR ret=%d\n", + ret); + goto errout; + } + newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; + } else + newxprt->sc_dma_lkey = + newxprt->sc_cm_id->device->local_dma_lkey; + /* Post receive buffers */ for (i = 0; i < newxprt->sc_max_requests; i++) { ret = svc_rdma_post_recv(newxprt); @@ -987,7 +1125,6 @@ static void __svc_rdma_free(struct work_struct *work) * cm_id because the device ptr is needed to unmap the dma in * svc_rdma_put_context. */ - spin_lock_bh(&rdma->sc_read_complete_lock); while (!list_empty(&rdma->sc_read_complete_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_read_complete_q.next, @@ -996,10 +1133,8 @@ static void __svc_rdma_free(struct work_struct *work) list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } - spin_unlock_bh(&rdma->sc_read_complete_lock); /* Destroy queued, but not processed recv completions */ - spin_lock_bh(&rdma->sc_rq_dto_lock); while (!list_empty(&rdma->sc_rq_dto_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_rq_dto_q.next, @@ -1008,10 +1143,13 @@ static void __svc_rdma_free(struct work_struct *work) list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } - spin_unlock_bh(&rdma->sc_rq_dto_lock); /* Warn if we leaked a resource or under-referenced */ WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); + WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + + /* De-allocate fastreg mr */ + rdma_dealloc_frmr_q(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) @@ -1032,7 +1170,6 @@ static void __svc_rdma_free(struct work_struct *work) /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); - destroy_context_cache(rdma); kfree(rdma); } @@ -1067,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +/* + * Attempt to register the kvec representing the RPC memory with the + * device. + * + * Returns: + * NULL : The device does not support fastreg or there were no more + * fastreg mr. + * frmr : The kvec register request was successfully posted. + * <0 : An error was encountered attempting to register the kvec. + */ +int svc_rdma_fastreg(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + struct ib_send_wr fastreg_wr; + u8 key; + + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof fastreg_wr); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + return svc_rdma_send(xprt, &fastreg_wr); +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { - struct ib_send_wr *bad_wr; + struct ib_send_wr *bad_wr, *n_wr; + int wr_count; + int i; int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); - BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != - wr->opcode); + wr_count = 1; + for (n_wr = wr->next; n_wr; n_wr = n_wr->next) + wr_count++; + /* If the SQ is full, wait until an SQ entry is available */ while (1) { spin_lock_bh(&xprt->sc_lock); - if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { + if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); @@ -1096,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) return 0; continue; } - /* Bumped used SQ WR count and post */ - svc_xprt_get(&xprt->sc_xprt); + /* Take a transport ref for each WR posted */ + for (i = 0; i < wr_count; i++) + svc_xprt_get(&xprt->sc_xprt); + + /* Bump used SQ WR count and post */ + atomic_add(wr_count, &xprt->sc_sq_count); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); - if (!ret) - atomic_inc(&xprt->sc_sq_count); - else { - svc_xprt_put(&xprt->sc_xprt); + if (ret) { + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + atomic_sub(wr_count, &xprt->sc_sq_count); + for (i = 0; i < wr_count; i ++) + svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); } spin_unlock_bh(&xprt->sc_lock); + if (ret) + wake_up(&xprt->sc_send_wait); break; } return ret; @@ -1134,7 +1316,12 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* Prepare SGE for local address */ sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, p, 0, PAGE_SIZE, DMA_FROM_DEVICE); - sge.lkey = xprt->sc_phys_mr->lkey; + if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { + put_page(p); + return; + } + atomic_inc(&xprt->sc_dma_used); + sge.lkey = xprt->sc_dma_lkey; sge.length = length; ctxt = svc_rdma_get_context(xprt); @@ -1155,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); + ib_dma_unmap_page(xprt->sc_cm_id->device, + sge.addr, PAGE_SIZE, + DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ddbe981ab516..9a288d5eea64 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3,8 +3,8 @@ * * Client-side transport implementation for sockets. * - * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> - * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> + * TCP callback races fixes (C) 1998 Red Hat + * TCP send fixes (C) 1998 Red Hat * TCP NFS related read + write fixes * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> * @@ -579,7 +579,6 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -671,7 +670,6 @@ static int xs_tcp_send_request(struct rpc_task *task) * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { - req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index b4f0525f91af..972201cd5fa7 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -4,7 +4,6 @@ * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net directories for each protocol family. [MS] * - * $Log: sysctl_net.c,v $ * Revision 1.2 1996/05/08 20:24:40 shaver * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and * NET_IPV4_IP_FORWARD. @@ -30,25 +29,59 @@ #include <linux/if_tr.h> #endif -static struct list_head * +static struct ctl_table_set * net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) { - return &namespaces->net_ns->sysctl_table_headers; + return &namespaces->net_ns->sysctls; +} + +static int is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->net_ns->sysctls == set; +} + +/* Return standard mode bits for table entry. */ +static int net_ctl_permissions(struct ctl_table_root *root, + struct nsproxy *nsproxy, + struct ctl_table *table) +{ + /* Allow network administrator to have same access as root. */ + if (capable(CAP_NET_ADMIN)) { + int mode = (table->mode >> 6) & 7; + return (mode << 6) | (mode << 3) | mode; + } + return table->mode; } static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, + .permissions = net_ctl_permissions, +}; + +static int net_ctl_ro_header_perms(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table) +{ + if (namespaces->net_ns == &init_net) + return table->mode; + else + return table->mode & ~0222; +} + +static struct ctl_table_root net_sysctl_ro_root = { + .permissions = net_ctl_ro_header_perms, }; static int sysctl_net_init(struct net *net) { - INIT_LIST_HEAD(&net->sysctl_table_headers); + setup_sysctl_set(&net->sysctls, + &net_sysctl_ro_root.default_set, + is_seen); return 0; } static void sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctl_table_headers)); + WARN_ON(!list_empty(&net->sysctls.list)); return; } @@ -64,6 +97,8 @@ static __init int sysctl_init(void) if (ret) goto out; register_sysctl_root(&net_sysctl_root); + setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); + register_sysctl_root(&net_sysctl_ro_root); out: return ret; } @@ -80,6 +115,14 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net, } EXPORT_SYMBOL_GPL(register_net_sysctl_table); +struct ctl_table_header *register_net_sysctl_rotable(const + struct ctl_path *path, struct ctl_table *table) +{ + return __register_sysctl_paths(&net_sysctl_ro_root, + &init_nsproxy, path, table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); + void unregister_net_sysctl_table(struct ctl_table_header *header) { unregister_sysctl_table(header); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e7880172ef19..3ddaff42d1bb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -96,8 +96,8 @@ struct bcbearer { struct media media; struct bcbearer_pair bpairs[MAX_BEARERS]; struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct node_map remains; - struct node_map remains_new; + struct tipc_node_map remains; + struct tipc_node_map remains_new; }; /** @@ -110,7 +110,7 @@ struct bcbearer { struct bclink { struct link link; - struct node node; + struct tipc_node node; }; @@ -149,7 +149,7 @@ static void bcbuf_decr_acks(struct sk_buff *buf) * Called with 'node' locked, bc_lock unlocked */ -static void bclink_set_gap(struct node *n_ptr) +static void bclink_set_gap(struct tipc_node *n_ptr) { struct sk_buff *buf = n_ptr->bclink.deferred_head; @@ -202,7 +202,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * Node is locked, bc_lock unlocked. */ -void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked) +void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { struct sk_buff *crs; struct sk_buff *next; @@ -250,7 +250,7 @@ void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked) * tipc_net_lock and node lock set */ -static void bclink_send_ack(struct node *n_ptr) +static void bclink_send_ack(struct tipc_node *n_ptr) { struct link *l_ptr = n_ptr->active_links[n_ptr->addr & 1]; @@ -264,7 +264,7 @@ static void bclink_send_ack(struct node *n_ptr) * tipc_net_lock and node lock set */ -static void bclink_send_nack(struct node *n_ptr) +static void bclink_send_nack(struct tipc_node *n_ptr) { struct sk_buff *buf; struct tipc_msg *msg; @@ -276,7 +276,7 @@ static void bclink_send_nack(struct node *n_ptr) if (buf) { msg = buf_msg(buf); msg_init(msg, BCAST_PROTOCOL, STATE_MSG, - TIPC_OK, INT_H_SIZE, n_ptr->addr); + INT_H_SIZE, n_ptr->addr); msg_set_mc_netid(msg, tipc_net_id); msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); @@ -308,7 +308,7 @@ static void bclink_send_nack(struct node *n_ptr) * tipc_net_lock and node lock set */ -void tipc_bclink_check_gap(struct node *n_ptr, u32 last_sent) +void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 last_sent) { if (!n_ptr->bclink.supported || less_eq(last_sent, mod(n_ptr->bclink.last_in))) @@ -328,7 +328,7 @@ void tipc_bclink_check_gap(struct node *n_ptr, u32 last_sent) static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 gap_to) { - struct node *n_ptr = tipc_node_find(dest); + struct tipc_node *n_ptr = tipc_node_find(dest); u32 my_after, my_to; if (unlikely(!n_ptr || !tipc_node_is_up(n_ptr))) @@ -418,7 +418,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) static int rx_count = 0; #endif struct tipc_msg *msg = buf_msg(buf); - struct node* node = tipc_node_find(msg_prevnode(msg)); + struct tipc_node* node = tipc_node_find(msg_prevnode(msg)); u32 next_in; u32 seqno; struct sk_buff *deferred; @@ -538,7 +538,7 @@ u32 tipc_bclink_get_last_sent(void) return last_sent; } -u32 tipc_bclink_acks_missing(struct node *n_ptr) +u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) { return (n_ptr->bclink.supported && (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); @@ -571,7 +571,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, assert(tipc_cltr_bcast_nodes.count != 0); bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count); msg = buf_msg(buf); - msg_set_non_seq(msg); + msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); } @@ -611,7 +611,7 @@ swap: bcbearer->bpairs[bp_index].secondary = p; update: if (bcbearer->remains_new.count == 0) - return TIPC_OK; + return 0; bcbearer->remains = bcbearer->remains_new; } @@ -620,7 +620,7 @@ update: bcbearer->bearer.publ.blocked = 1; bcl->stats.bearer_congs++; - return ~TIPC_OK; + return 1; } /** @@ -756,7 +756,7 @@ int tipc_bclink_reset_stats(void) spin_lock_bh(&bc_lock); memset(&bcl->stats, 0, sizeof(bcl->stats)); spin_unlock_bh(&bc_lock); - return TIPC_OK; + return 0; } int tipc_bclink_set_queue_limits(u32 limit) @@ -769,7 +769,7 @@ int tipc_bclink_set_queue_limits(u32 limit) spin_lock_bh(&bc_lock); tipc_link_set_queue_limits(bcl, limit); spin_unlock_bh(&bc_lock); - return TIPC_OK; + return 0; } int tipc_bclink_init(void) @@ -810,7 +810,7 @@ int tipc_bclink_init(void) tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE); } - return TIPC_OK; + return 0; } void tipc_bclink_stop(void) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a2416fa6b906..5aa024b99c55 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -41,12 +41,12 @@ #define WSIZE 32 /** - * struct node_map - set of node identifiers + * struct tipc_node_map - set of node identifiers * @count: # of nodes in set * @map: bitmap of node identifiers that are in the set */ -struct node_map { +struct tipc_node_map { u32 count; u32 map[MAX_NODES / WSIZE]; }; @@ -68,7 +68,7 @@ struct port_list { }; -struct node; +struct tipc_node; extern char tipc_bclink_name[]; @@ -77,7 +77,7 @@ extern char tipc_bclink_name[]; * nmap_add - add a node to a node map */ -static inline void tipc_nmap_add(struct node_map *nm_ptr, u32 node) +static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -93,7 +93,7 @@ static inline void tipc_nmap_add(struct node_map *nm_ptr, u32 node) * nmap_remove - remove a node from a node map */ -static inline void tipc_nmap_remove(struct node_map *nm_ptr, u32 node) +static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -109,7 +109,7 @@ static inline void tipc_nmap_remove(struct node_map *nm_ptr, u32 node) * nmap_equal - test for equality of node maps */ -static inline int tipc_nmap_equal(struct node_map *nm_a, struct node_map *nm_b) +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) { return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } @@ -121,8 +121,8 @@ static inline int tipc_nmap_equal(struct node_map *nm_a, struct node_map *nm_b) * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) */ -static inline void tipc_nmap_diff(struct node_map *nm_a, struct node_map *nm_b, - struct node_map *nm_diff) +static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, + struct tipc_node_map *nm_diff) { int stop = sizeof(nm_a->map) / sizeof(u32); int w; @@ -195,12 +195,12 @@ static inline void tipc_port_list_free(struct port_list *pl_ptr) int tipc_bclink_init(void); void tipc_bclink_stop(void); -void tipc_bclink_acknowledge(struct node *n_ptr, u32 acked); +void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); int tipc_bclink_send_msg(struct sk_buff *buf); void tipc_bclink_recv_pkt(struct sk_buff *buf); u32 tipc_bclink_get_last_sent(void); -u32 tipc_bclink_acks_missing(struct node *n_ptr); -void tipc_bclink_check_gap(struct node *n_ptr, u32 seqno); +u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); +void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 seqno); int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 271a375b49b7..a7a36779b9b3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -370,7 +370,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) */ static int bearer_push(struct bearer *b_ptr) { - u32 res = TIPC_OK; + u32 res = 0; struct link *ln, *tln; if (b_ptr->publ.blocked) @@ -599,7 +599,7 @@ int tipc_block_bearer(const char *name) spin_lock_bh(&b_ptr->publ.lock); b_ptr->publ.blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - struct node *n_ptr = l_ptr->owner; + struct tipc_node *n_ptr = l_ptr->owner; spin_lock_bh(&n_ptr->lock); tipc_link_reset(l_ptr); @@ -607,7 +607,7 @@ int tipc_block_bearer(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); read_unlock_bh(&tipc_net_lock); - return TIPC_OK; + return 0; } /** @@ -645,7 +645,7 @@ static int bearer_disable(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); memset(b_ptr, 0, sizeof(struct bearer)); - return TIPC_OK; + return 0; } int tipc_disable_bearer(const char *name) @@ -668,7 +668,7 @@ int tipc_bearer_init(void) tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); if (tipc_bearers && media_list) { - res = TIPC_OK; + res = 0; } else { kfree(tipc_bearers); kfree(media_list); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6a36b6600e6c..ca5734892713 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -104,7 +104,7 @@ struct bearer { u32 continue_count; int active; char net_plane; - struct node_map nodes; + struct tipc_node_map nodes; }; struct bearer_name { diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 4bb3404f610b..689fdefe9d04 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -48,8 +48,8 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, u32 lower, u32 upper); static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest); -struct node **tipc_local_nodes = NULL; -struct node_map tipc_cltr_bcast_nodes = {0,{0,}}; +struct tipc_node **tipc_local_nodes = NULL; +struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}}; u32 tipc_highest_allowed_slave = 0; struct cluster *tipc_cltr_create(u32 addr) @@ -115,7 +115,7 @@ void tipc_cltr_delete(struct cluster *c_ptr) u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr) { - struct node *n_ptr; + struct tipc_node *n_ptr; u32 n_num = tipc_node(addr) + 1; if (!c_ptr) @@ -133,7 +133,7 @@ u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr) return 0; } -void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr) +void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr) { u32 n_num = tipc_node(n_ptr->addr); u32 max_n_num = tipc_max_nodes; @@ -196,7 +196,7 @@ u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref) * Uses deterministic and fair algorithm. */ -struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector) +struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector) { u32 n_num; u32 mask = tipc_max_nodes; @@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) if (buf) { msg = buf_msg(buf); memset((char *)msg, 0, size); - msg_init(msg, ROUTE_DISTRIBUTOR, 0, TIPC_OK, INT_H_SIZE, dest); + msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest); } return buf; } @@ -379,7 +379,7 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct cluster *c_ptr; - struct node *n_ptr; + struct tipc_node *n_ptr; unchar *node_table; u32 table_size; u32 router; @@ -499,7 +499,7 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, u32 lower, u32 upper) { struct sk_buff *buf_copy; - struct node *n_ptr; + struct tipc_node *n_ptr; u32 n_num; u32 tstop; @@ -534,7 +534,7 @@ void tipc_cltr_broadcast(struct sk_buff *buf) { struct sk_buff *buf_copy; struct cluster *c_ptr; - struct node *n_ptr; + struct tipc_node *n_ptr; u32 n_num; u32 tstart; u32 tstop; @@ -571,6 +571,6 @@ exit: int tipc_cltr_init(void) { tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves; - return tipc_cltr_create(tipc_own_addr) ? TIPC_OK : -ENOMEM; + return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM; } diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h index 62df074afaec..333efb0b9c44 100644 --- a/net/tipc/cluster.h +++ b/net/tipc/cluster.h @@ -54,24 +54,24 @@ struct cluster { u32 addr; struct _zone *owner; - struct node **nodes; + struct tipc_node **nodes; u32 highest_node; u32 highest_slave; }; -extern struct node **tipc_local_nodes; +extern struct tipc_node **tipc_local_nodes; extern u32 tipc_highest_allowed_slave; -extern struct node_map tipc_cltr_bcast_nodes; +extern struct tipc_node_map tipc_cltr_bcast_nodes; void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router); void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest); -struct node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector); +struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector); u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref); void tipc_cltr_recv_routing_table(struct sk_buff *buf); struct cluster *tipc_cltr_create(u32 addr); void tipc_cltr_delete(struct cluster *c_ptr); -void tipc_cltr_attach_node(struct cluster *c_ptr, struct node *n_ptr); +void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr); void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest); void tipc_cltr_broadcast(struct sk_buff *buf); int tipc_cltr_init(void); diff --git a/net/tipc/config.c b/net/tipc/config.c index c71337a22d33..ca3544d030c7 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -293,7 +293,6 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_own_addr = addr; /* * Must release all spinlocks before calling start_net() because @@ -306,7 +305,7 @@ static struct sk_buff *cfg_set_own_addr(void) */ spin_unlock_bh(&config_lock); - tipc_core_start_net(); + tipc_core_start_net(addr); spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -529,7 +528,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area break; #endif case TIPC_CMD_SET_LOG_SIZE: - rep_tlv_buf = tipc_log_resize(req_tlv_area, req_tlv_space); + rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space); break; case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_log_dump(); @@ -602,6 +601,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; + case TIPC_CMD_NOT_NET_ADMIN: + rep_tlv_buf = + tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + break; default: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (unknown command)"); diff --git a/net/tipc/core.c b/net/tipc/core.c index 740aac5cdfb6..3256bd7d398f 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -49,7 +49,7 @@ #include "config.h" -#define TIPC_MOD_VER "1.6.3" +#define TIPC_MOD_VER "1.6.4" #ifndef CONFIG_TIPC_ZONES #define CONFIG_TIPC_ZONES 3 @@ -117,11 +117,11 @@ void tipc_core_stop_net(void) * start_net - start TIPC networking sub-systems */ -int tipc_core_start_net(void) +int tipc_core_start_net(unsigned long addr) { int res; - if ((res = tipc_net_start()) || + if ((res = tipc_net_start(addr)) || (res = tipc_eth_media_start())) { tipc_core_stop_net(); } @@ -164,8 +164,7 @@ int tipc_core_start(void) tipc_mode = TIPC_NODE_MODE; if ((res = tipc_handler_start()) || - (res = tipc_ref_table_init(tipc_max_ports + tipc_max_subscriptions, - tipc_random)) || + (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) || (res = tipc_reg_start()) || (res = tipc_nametbl_init()) || (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) || @@ -182,7 +181,7 @@ static int __init tipc_init(void) { int res; - tipc_log_reinit(CONFIG_TIPC_LOG); + tipc_log_resize(CONFIG_TIPC_LOG); info("Activated (version " TIPC_MOD_VER " compiled " __DATE__ " " __TIME__ ")\n"); @@ -209,7 +208,7 @@ static void __exit tipc_exit(void) tipc_core_stop_net(); tipc_core_stop(); info("Deactivated\n"); - tipc_log_stop(); + tipc_log_resize(0); } module_init(tipc_init); diff --git a/net/tipc/core.h b/net/tipc/core.h index 5a0e4878d3b7..a881f92a8537 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,84 +59,108 @@ #include <linux/vmalloc.h> /* - * TIPC debugging code + * TIPC sanity test macros */ #define assert(i) BUG_ON(!(i)) -struct tipc_msg; -extern struct print_buf *TIPC_NULL, *TIPC_CONS, *TIPC_LOG; -extern struct print_buf *TIPC_TEE(struct print_buf *, struct print_buf *); -void tipc_msg_print(struct print_buf*,struct tipc_msg *,const char*); -void tipc_printf(struct print_buf *, const char *fmt, ...); -void tipc_dump(struct print_buf*,const char *fmt, ...); - -#ifdef CONFIG_TIPC_DEBUG - /* - * TIPC debug support included: - * - system messages are printed to TIPC_OUTPUT print buffer - * - debug messages are printed to DBG_OUTPUT print buffer + * TIPC system monitoring code */ -#define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_ERR "TIPC: " fmt, ## arg) -#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_WARNING "TIPC: " fmt, ## arg) -#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, KERN_NOTICE "TIPC: " fmt, ## arg) +/* + * TIPC's print buffer subsystem supports the following print buffers: + * + * TIPC_NULL : null buffer (i.e. print nowhere) + * TIPC_CONS : system console + * TIPC_LOG : TIPC log buffer + * &buf : user-defined buffer (struct print_buf *) + * + * Note: TIPC_LOG is configured to echo its output to the system console; + * user-defined buffers can be configured to do the same thing. + */ -#define dbg(fmt, arg...) do {if (DBG_OUTPUT != TIPC_NULL) tipc_printf(DBG_OUTPUT, fmt, ## arg);} while(0) -#define msg_dbg(msg, txt) do {if (DBG_OUTPUT != TIPC_NULL) tipc_msg_print(DBG_OUTPUT, msg, txt);} while(0) -#define dump(fmt, arg...) do {if (DBG_OUTPUT != TIPC_NULL) tipc_dump(DBG_OUTPUT, fmt, ##arg);} while(0) +extern struct print_buf *const TIPC_NULL; +extern struct print_buf *const TIPC_CONS; +extern struct print_buf *const TIPC_LOG; +void tipc_printf(struct print_buf *, const char *fmt, ...); /* - * By default, TIPC_OUTPUT is defined to be system console and TIPC log buffer, - * while DBG_OUTPUT is the null print buffer. These defaults can be changed - * here, or on a per .c file basis, by redefining these symbols. The following - * print buffer options are available: - * - * TIPC_NULL : null buffer (i.e. print nowhere) - * TIPC_CONS : system console - * TIPC_LOG : TIPC log buffer - * &buf : user-defined buffer (struct print_buf *) - * TIPC_TEE(&buf_a,&buf_b) : list of buffers (eg. TIPC_TEE(TIPC_CONS,TIPC_LOG)) + * TIPC_OUTPUT is the destination print buffer for system messages. */ #ifndef TIPC_OUTPUT -#define TIPC_OUTPUT TIPC_TEE(TIPC_CONS,TIPC_LOG) -#endif - -#ifndef DBG_OUTPUT -#define DBG_OUTPUT TIPC_NULL +#define TIPC_OUTPUT TIPC_LOG #endif -#else - /* - * TIPC debug support not included: - * - system messages are printed to system console - * - debug messages are not printed + * TIPC can be configured to send system messages to TIPC_OUTPUT + * or to the system console only. */ +#ifdef CONFIG_TIPC_DEBUG + +#define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_ERR "TIPC: " fmt, ## arg) +#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_WARNING "TIPC: " fmt, ## arg) +#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \ + KERN_NOTICE "TIPC: " fmt, ## arg) + +#else + #define err(fmt, arg...) printk(KERN_ERR "TIPC: " fmt , ## arg) #define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg) #define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg) -#define dbg(fmt, arg...) do {} while (0) -#define msg_dbg(msg,txt) do {} while (0) -#define dump(fmt,arg...) do {} while (0) +#endif +/* + * DBG_OUTPUT is the destination print buffer for debug messages. + * It defaults to the the null print buffer, but can be redefined + * (typically in the individual .c files being debugged) to allow + * selected debug messages to be generated where needed. + */ + +#ifndef DBG_OUTPUT +#define DBG_OUTPUT TIPC_NULL +#endif /* - * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is - * the null print buffer. Thes ensures that any system or debug messages - * that are generated without using the above macros are handled correctly. + * TIPC can be configured to send debug messages to the specified print buffer + * (typically DBG_OUTPUT) or to suppress them entirely. */ -#undef TIPC_OUTPUT -#define TIPC_OUTPUT TIPC_CONS +#ifdef CONFIG_TIPC_DEBUG -#undef DBG_OUTPUT -#define DBG_OUTPUT TIPC_NULL +#define dbg(fmt, arg...) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_printf(DBG_OUTPUT, fmt, ## arg); \ + } while (0) +#define msg_dbg(msg, txt) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_msg_dbg(DBG_OUTPUT, msg, txt); \ + } while (0) +#define dump(fmt, arg...) \ + do { \ + if (DBG_OUTPUT != TIPC_NULL) \ + tipc_dump_dbg(DBG_OUTPUT, fmt, ##arg); \ + } while (0) + +void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); +void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); + +#else + +#define dbg(fmt, arg...) do {} while (0) +#define msg_dbg(msg, txt) do {} while (0) +#define dump(fmt, arg...) do {} while (0) + +#define tipc_msg_dbg(...) do {} while (0) +#define tipc_dump_dbg(...) do {} while (0) #endif @@ -178,7 +202,7 @@ extern atomic_t tipc_user_count; extern int tipc_core_start(void); extern void tipc_core_stop(void); -extern int tipc_core_start_net(void); +extern int tipc_core_start_net(unsigned long addr); extern void tipc_core_stop_net(void); extern int tipc_handler_start(void); extern void tipc_handler_stop(void); diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index e809d2a2ce06..29ecae851668 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -2,7 +2,7 @@ * net/tipc/dbg.c: TIPC print buffer routines for debugging * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,17 +38,43 @@ #include "config.h" #include "dbg.h" -static char print_string[TIPC_PB_MAX_STR]; -static DEFINE_SPINLOCK(print_lock); +/* + * TIPC pre-defines the following print buffers: + * + * TIPC_NULL : null buffer (i.e. print nowhere) + * TIPC_CONS : system console + * TIPC_LOG : TIPC log buffer + * + * Additional user-defined print buffers are also permitted. + */ -static struct print_buf null_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_NULL = &null_buf; +static struct print_buf null_buf = { NULL, 0, NULL, 0 }; +struct print_buf *const TIPC_NULL = &null_buf; -static struct print_buf cons_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_CONS = &cons_buf; +static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; +struct print_buf *const TIPC_CONS = &cons_buf; -static struct print_buf log_buf = { NULL, 0, NULL, NULL }; -struct print_buf *TIPC_LOG = &log_buf; +static struct print_buf log_buf = { NULL, 0, NULL, 1 }; +struct print_buf *const TIPC_LOG = &log_buf; + +/* + * Locking policy when using print buffers. + * + * 1) tipc_printf() uses 'print_lock' to protect against concurrent access to + * 'print_string' when writing to a print buffer. This also protects against + * concurrent writes to the print buffer being written to. + * + * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned + * use of 'print_lock' to protect against all types of concurrent operations + * on their associated print buffer (not just write operations). + * + * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely + * on the caller to prevent simultaneous use of the print buffer(s) being + * manipulated. + */ + +static char print_string[TIPC_PB_MAX_STR]; +static DEFINE_SPINLOCK(print_lock); #define FORMAT(PTR,LEN,FMT) \ @@ -60,27 +86,14 @@ struct print_buf *TIPC_LOG = &log_buf; *(PTR + LEN) = '\0';\ } -/* - * Locking policy when using print buffers. - * - * The following routines use 'print_lock' for protection: - * 1) tipc_printf() - to protect its print buffer(s) and 'print_string' - * 2) TIPC_TEE() - to protect its print buffer(s) - * 3) tipc_dump() - to protect its print buffer(s) and 'print_string' - * 4) tipc_log_XXX() - to protect TIPC_LOG - * - * All routines of the form tipc_printbuf_XXX() rely on the caller to prevent - * simultaneous use of the print buffer(s) being manipulated. - */ - /** * tipc_printbuf_init - initialize print buffer to empty * @pb: pointer to print buffer structure * @raw: pointer to character array used by print buffer * @size: size of character array * - * Makes the print buffer a null device that discards anything written to it - * if the character array is too small (or absent). + * Note: If the character array is too small (or absent), the print buffer + * becomes a null device that discards anything written to it. */ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) @@ -88,13 +101,13 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) pb->buf = raw; pb->crs = raw; pb->size = size; - pb->next = NULL; + pb->echo = 0; if (size < TIPC_PB_MIN_SIZE) { pb->buf = NULL; } else if (raw) { pb->buf[0] = 0; - pb->buf[size-1] = ~0; + pb->buf[size - 1] = ~0; } } @@ -105,7 +118,11 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) void tipc_printbuf_reset(struct print_buf *pb) { - tipc_printbuf_init(pb, pb->buf, pb->size); + if (pb->buf) { + pb->crs = pb->buf; + pb->buf[0] = 0; + pb->buf[pb->size - 1] = ~0; + } } /** @@ -141,7 +158,7 @@ int tipc_printbuf_validate(struct print_buf *pb) if (pb->buf[pb->size - 1] == 0) { cp_buf = kmalloc(pb->size, GFP_ATOMIC); - if (cp_buf != NULL){ + if (cp_buf) { tipc_printbuf_init(&cb, cp_buf, pb->size); tipc_printbuf_move(&cb, pb); tipc_printbuf_move(pb, &cb); @@ -179,15 +196,16 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) } if (pb_to->size < pb_from->size) { - tipc_printbuf_reset(pb_to); - tipc_printf(pb_to, "*** PRINT BUFFER MOVE ERROR ***"); + strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***"); + pb_to->buf[pb_to->size - 1] = ~0; + pb_to->crs = strchr(pb_to->buf, 0); return; } /* Copy data from char after cursor to end (if used) */ len = pb_from->buf + pb_from->size - pb_from->crs - 2; - if ((pb_from->buf[pb_from->size-1] == 0) && (len > 0)) { + if ((pb_from->buf[pb_from->size - 1] == 0) && (len > 0)) { strcpy(pb_to->buf, pb_from->crs + 1); pb_to->crs = pb_to->buf + len; } else @@ -203,8 +221,8 @@ void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) } /** - * tipc_printf - append formatted output to print buffer chain - * @pb: pointer to chain of print buffers (may be NULL) + * tipc_printf - append formatted output to print buffer + * @pb: pointer to print buffer * @fmt: formatted info to be printed */ @@ -213,68 +231,40 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...) int chars_to_add; int chars_left; char save_char; - struct print_buf *pb_next; spin_lock_bh(&print_lock); + FORMAT(print_string, chars_to_add, fmt); if (chars_to_add >= TIPC_PB_MAX_STR) strcpy(print_string, "*** PRINT BUFFER STRING TOO LONG ***"); - while (pb) { - if (pb == TIPC_CONS) - printk(print_string); - else if (pb->buf) { - chars_left = pb->buf + pb->size - pb->crs - 1; - if (chars_to_add <= chars_left) { - strcpy(pb->crs, print_string); - pb->crs += chars_to_add; - } else if (chars_to_add >= (pb->size - 1)) { - strcpy(pb->buf, print_string + chars_to_add + 1 - - pb->size); - pb->crs = pb->buf + pb->size - 1; - } else { - strcpy(pb->buf, print_string + chars_left); - save_char = print_string[chars_left]; - print_string[chars_left] = 0; - strcpy(pb->crs, print_string); - print_string[chars_left] = save_char; - pb->crs = pb->buf + chars_to_add - chars_left; - } + if (pb->buf) { + chars_left = pb->buf + pb->size - pb->crs - 1; + if (chars_to_add <= chars_left) { + strcpy(pb->crs, print_string); + pb->crs += chars_to_add; + } else if (chars_to_add >= (pb->size - 1)) { + strcpy(pb->buf, print_string + chars_to_add + 1 + - pb->size); + pb->crs = pb->buf + pb->size - 1; + } else { + strcpy(pb->buf, print_string + chars_left); + save_char = print_string[chars_left]; + print_string[chars_left] = 0; + strcpy(pb->crs, print_string); + print_string[chars_left] = save_char; + pb->crs = pb->buf + chars_to_add - chars_left; } - pb_next = pb->next; - pb->next = NULL; - pb = pb_next; } - spin_unlock_bh(&print_lock); -} -/** - * TIPC_TEE - perform next output operation on both print buffers - * @b0: pointer to chain of print buffers (may be NULL) - * @b1: pointer to print buffer to add to chain - * - * Returns pointer to print buffer chain. - */ + if (pb->echo) + printk(print_string); -struct print_buf *TIPC_TEE(struct print_buf *b0, struct print_buf *b1) -{ - struct print_buf *pb = b0; - - if (!b0 || (b0 == b1)) - return b1; - - spin_lock_bh(&print_lock); - while (pb->next) { - if ((pb->next == b1) || (pb->next == b0)) - pb->next = pb->next->next; - else - pb = pb->next; - } - pb->next = b1; spin_unlock_bh(&print_lock); - return b0; } +#ifdef CONFIG_TIPC_DEBUG + /** * print_to_console - write string of bytes to console in multiple chunks */ @@ -321,72 +311,66 @@ static void printbuf_dump(struct print_buf *pb) } /** - * tipc_dump - dump non-console print buffer(s) to console - * @pb: pointer to chain of print buffers + * tipc_dump_dbg - dump (non-console) print buffer to console + * @pb: pointer to print buffer */ -void tipc_dump(struct print_buf *pb, const char *fmt, ...) +void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...) { - struct print_buf *pb_next; int len; + if (pb == TIPC_CONS) + return; + spin_lock_bh(&print_lock); + FORMAT(print_string, len, fmt); printk(print_string); - for (; pb; pb = pb->next) { - if (pb != TIPC_CONS) { - printk("\n---- Start of %s log dump ----\n\n", - (pb == TIPC_LOG) ? "global" : "local"); - printbuf_dump(pb); - tipc_printbuf_reset(pb); - printk("\n---- End of dump ----\n"); - } - pb_next = pb->next; - pb->next = NULL; - pb = pb_next; - } + printk("\n---- Start of %s log dump ----\n\n", + (pb == TIPC_LOG) ? "global" : "local"); + printbuf_dump(pb); + tipc_printbuf_reset(pb); + printk("\n---- End of dump ----\n"); + spin_unlock_bh(&print_lock); } +#endif + /** - * tipc_log_stop - free up TIPC log print buffer + * tipc_log_resize - change the size of the TIPC log buffer + * @log_size: print buffer size to use */ -void tipc_log_stop(void) +int tipc_log_resize(int log_size) { + int res = 0; + spin_lock_bh(&print_lock); if (TIPC_LOG->buf) { kfree(TIPC_LOG->buf); TIPC_LOG->buf = NULL; } - spin_unlock_bh(&print_lock); -} - -/** - * tipc_log_reinit - (re)initialize TIPC log print buffer - * @log_size: print buffer size to use - */ - -void tipc_log_reinit(int log_size) -{ - tipc_log_stop(); - if (log_size) { if (log_size < TIPC_PB_MIN_SIZE) log_size = TIPC_PB_MIN_SIZE; - spin_lock_bh(&print_lock); + res = TIPC_LOG->echo; tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC), log_size); - spin_unlock_bh(&print_lock); + TIPC_LOG->echo = res; + res = !TIPC_LOG->buf; } + spin_unlock_bh(&print_lock); + + return res; } /** - * tipc_log_resize - reconfigure size of TIPC log buffer + * tipc_log_resize_cmd - reconfigure size of TIPC log buffer */ -struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space) +struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space) { u32 value; @@ -397,7 +381,9 @@ struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space) if (value != delimit(value, 0, 32768)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (log size must be 0-32768)"); - tipc_log_reinit(value); + if (tipc_log_resize(value)) + return tipc_cfg_reply_error_string( + "unable to create specified log (log size is now 0)"); return tipc_cfg_reply_none(); } @@ -410,27 +396,32 @@ struct sk_buff *tipc_log_dump(void) struct sk_buff *reply; spin_lock_bh(&print_lock); - if (!TIPC_LOG->buf) + if (!TIPC_LOG->buf) { + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_ultra_string("log not activated\n"); - else if (tipc_printbuf_empty(TIPC_LOG)) + } else if (tipc_printbuf_empty(TIPC_LOG)) { + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_ultra_string("log is empty\n"); + } else { struct tlv_desc *rep_tlv; struct print_buf pb; int str_len; str_len = min(TIPC_LOG->size, 32768u); + spin_unlock_bh(&print_lock); reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len)); if (reply) { rep_tlv = (struct tlv_desc *)reply->data; tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len); + spin_lock_bh(&print_lock); tipc_printbuf_move(&pb, TIPC_LOG); + spin_unlock_bh(&print_lock); str_len = strlen(TLV_DATA(rep_tlv)) + 1; skb_put(reply, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); } } - spin_unlock_bh(&print_lock); return reply; } diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h index c01b085000e0..5ef1bc8f64ef 100644 --- a/net/tipc/dbg.h +++ b/net/tipc/dbg.h @@ -2,7 +2,7 @@ * net/tipc/dbg.h: Include file for TIPC print buffer routines * * Copyright (c) 1997-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,14 +42,14 @@ * @buf: pointer to character array containing print buffer contents * @size: size of character array * @crs: pointer to first unused space in character array (i.e. final NUL) - * @next: used to link print buffers when printing to more than one at a time + * @echo: echo output to system console if non-zero */ struct print_buf { char *buf; u32 size; char *crs; - struct print_buf *next; + int echo; }; #define TIPC_PB_MIN_SIZE 64 /* minimum size for a print buffer's array */ @@ -61,10 +61,10 @@ int tipc_printbuf_empty(struct print_buf *pb); int tipc_printbuf_validate(struct print_buf *pb); void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from); -void tipc_log_reinit(int log_size); -void tipc_log_stop(void); +int tipc_log_resize(int log_size); -struct sk_buff *tipc_log_resize(const void *req_tlv_area, int req_tlv_space); +struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, + int req_tlv_space); struct sk_buff *tipc_log_dump(void); #endif diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5d643e5721eb..74b7d1e28aec 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -120,9 +120,8 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, if (buf) { msg = buf_msg(buf); - msg_init(msg, LINK_CONFIG, type, TIPC_OK, DSC_H_SIZE, - dest_domain); - msg_set_non_seq(msg); + msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); @@ -156,11 +155,11 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, /** * tipc_disc_recv_msg - handle incoming link setup message (request or response) * @buf: buffer containing message + * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf) +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) { - struct bearer *b_ptr = (struct bearer *)TIPC_SKB_CB(buf)->handle; struct link *link; struct tipc_media_addr media_addr; struct tipc_msg *msg = buf_msg(buf); @@ -194,15 +193,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf) /* Always accept link here */ struct sk_buff *rbuf; struct tipc_media_addr *addr; - struct node *n_ptr = tipc_node_find(orig); + struct tipc_node *n_ptr = tipc_node_find(orig); int link_fully_up; dbg(" in own cluster\n"); if (n_ptr == NULL) { n_ptr = tipc_node_create(orig); - } - if (n_ptr == NULL) { - return; + if (!n_ptr) + return; } spin_lock_bh(&n_ptr->lock); link = n_ptr->links[b_ptr->identity]; diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 9fd7587b143a..c36eaeb7d5d0 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -48,7 +48,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf); +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); void tipc_disc_link_event(u32 addr, char *name, int up); #if 0 diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 9cd35eec3e7f..fe43ef7dd7e3 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -82,7 +82,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, dev->dev_addr, clone->len); dev_queue_xmit(clone); } - return TIPC_OK; + return 0; } /** @@ -101,7 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; - if (dev_net(dev) != &init_net) { + if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(buf); return 0; } @@ -113,12 +113,12 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, if (likely(buf->len == size)) { buf->next = NULL; tipc_recv_msg(buf, eb_ptr->bearer); - return TIPC_OK; + return 0; } } } kfree_skb(buf); - return TIPC_OK; + return 0; } /** @@ -198,7 +198,7 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; while ((eb_ptr->dev != dev)) { diff --git a/net/tipc/link.c b/net/tipc/link.c index 2a26a16e269f..dd4c18b9a35b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -51,6 +51,12 @@ /* + * Out-of-range value for link session numbers + */ + +#define INVALID_SESSION 0x10000 + +/* * Limit for deferred reception queue: */ @@ -147,9 +153,21 @@ static void link_print(struct link *l_ptr, struct print_buf *buf, #define LINK_LOG_BUF_SIZE 0 -#define dbg_link(fmt, arg...) do {if (LINK_LOG_BUF_SIZE) tipc_printf(&l_ptr->print_buf, fmt, ## arg); } while(0) -#define dbg_link_msg(msg, txt) do {if (LINK_LOG_BUF_SIZE) tipc_msg_print(&l_ptr->print_buf, msg, txt); } while(0) -#define dbg_link_state(txt) do {if (LINK_LOG_BUF_SIZE) link_print(l_ptr, &l_ptr->print_buf, txt); } while(0) +#define dbg_link(fmt, arg...) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + tipc_printf(&l_ptr->print_buf, fmt, ## arg); \ + } while (0) +#define dbg_link_msg(msg, txt) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + tipc_msg_dbg(&l_ptr->print_buf, msg, txt); \ + } while (0) +#define dbg_link_state(txt) \ + do { \ + if (LINK_LOG_BUF_SIZE) \ + link_print(l_ptr, &l_ptr->print_buf, txt); \ + } while (0) #define dbg_link_dump() do { \ if (LINK_LOG_BUF_SIZE) { \ tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \ @@ -450,9 +468,9 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, tipc_random); + msg_set_session(msg, (tipc_random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -693,10 +711,10 @@ void tipc_link_reset(struct link *l_ptr) u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); - msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); + msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); - /* Link is down, accept any session: */ - l_ptr->peer_session = 0; + /* Link is down, accept any session */ + l_ptr->peer_session = INVALID_SESSION; /* Prepare for max packet size negotiation */ link_init_max_pkt(l_ptr); @@ -1110,7 +1128,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) if (bundler) { msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - TIPC_OK, INT_H_SIZE, l_ptr->addr); + INT_H_SIZE, l_ptr->addr); skb_copy_to_linear_data(bundler, &bundler_hdr, INT_H_SIZE); skb_trim(bundler, INT_H_SIZE); @@ -1137,7 +1155,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) { struct link *l_ptr; - struct node *n_ptr; + struct tipc_node *n_ptr; int res = -ELINKCONG; read_lock_bh(&tipc_net_lock); @@ -1208,7 +1226,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf, int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) { struct link *l_ptr; - struct node *n_ptr; + struct tipc_node *n_ptr; int res; u32 selector = msg_origport(buf_msg(buf)) & 1; u32 dummy; @@ -1252,7 +1270,7 @@ int tipc_link_send_sections_fast(struct port *sender, struct tipc_msg *hdr = &sender->publ.phdr; struct link *l_ptr; struct sk_buff *buf; - struct node *node; + struct tipc_node *node; int res; u32 selector = msg_origport(hdr) & 1; @@ -1346,7 +1364,7 @@ static int link_send_sections_long(struct port *sender, u32 destaddr) { struct link *l_ptr; - struct node *node; + struct tipc_node *node; struct tipc_msg *hdr = &sender->publ.phdr; u32 dsz = msg_data_sz(hdr); u32 max_pkt,fragm_sz,rest; @@ -1374,7 +1392,7 @@ again: msg_dbg(hdr, ">FRAGMENTING>"); msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, msg_destnode(hdr)); + INT_H_SIZE, msg_destnode(hdr)); msg_set_link_selector(&fragm_hdr, sender->publ.ref); msg_set_size(&fragm_hdr, max_pkt); msg_set_fragm_no(&fragm_hdr, 1); @@ -1543,7 +1561,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; - return TIPC_OK; + return 0; } else { l_ptr->stats.bearer_congs++; msg_dbg(buf_msg(buf), "|>DEF-RETR>"); @@ -1562,7 +1580,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) l_ptr->unacked_window = 0; buf_discard(buf); l_ptr->proto_msg_queue = NULL; - return TIPC_OK; + return 0; } else { msg_dbg(buf_msg(buf), "|>DEF-PROT>"); l_ptr->stats.bearer_congs++; @@ -1586,7 +1604,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) msg_set_type(msg, CLOSED_MSG); msg_dbg(msg, ">PUSH-DATA>"); l_ptr->next_out = buf->next; - return TIPC_OK; + return 0; } else { msg_dbg(msg, "|PUSH-DATA|"); l_ptr->stats.bearer_congs++; @@ -1610,15 +1628,15 @@ void tipc_link_push_queue(struct link *l_ptr) do { res = tipc_link_push_packet(l_ptr); - } - while (res == TIPC_OK); + } while (!res); + if (res == PUSH_FAILED) tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); } static void link_reset_all(unsigned long addr) { - struct node *n_ptr; + struct tipc_node *n_ptr; char addr_string[16]; u32 i; @@ -1651,7 +1669,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) struct tipc_msg *msg = buf_msg(buf); warn("Retransmission failure on link <%s>\n", l_ptr->name); - tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>"); + tipc_msg_dbg(TIPC_OUTPUT, msg, ">RETR-FAIL>"); if (l_ptr->addr) { @@ -1664,7 +1682,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) /* Handle failure on broadcast link */ - struct node *n_ptr; + struct tipc_node *n_ptr; char addr_string[16]; tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); @@ -1748,21 +1766,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; } -/* - * link_recv_non_seq: Receive packets which are outside - * the link sequence flow - */ - -static void link_recv_non_seq(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - - if (msg_user(msg) == LINK_CONFIG) - tipc_disc_recv_msg(buf); - else - tipc_bclink_recv_pkt(buf); -} - /** * link_insert_deferred_queue - insert deferred messages back into receive chain */ @@ -1839,8 +1842,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) { read_lock_bh(&tipc_net_lock); while (head) { - struct bearer *b_ptr; - struct node *n_ptr; + struct bearer *b_ptr = (struct bearer *)tb_ptr; + struct tipc_node *n_ptr; struct link *l_ptr; struct sk_buff *crs; struct sk_buff *buf = head; @@ -1850,9 +1853,6 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) u32 released = 0; int type; - b_ptr = (struct bearer *)tb_ptr; - TIPC_SKB_CB(buf)->handle = b_ptr; - head = head->next; /* Ensure message is well-formed */ @@ -1871,7 +1871,10 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) msg = buf_msg(buf); if (unlikely(msg_non_seq(msg))) { - link_recv_non_seq(buf); + if (msg_user(msg) == LINK_CONFIG) + tipc_disc_recv_msg(buf, b_ptr); + else + tipc_bclink_recv_pkt(buf); continue; } @@ -1978,8 +1981,6 @@ deliver: if (link_recv_changeover_msg(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); - TIPC_SKB_CB(buf)->handle - = b_ptr; if (type == ORIGINAL_MSG) goto deliver; goto protocol_check; @@ -2263,7 +2264,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) switch (msg_type(msg)) { case RESET_MSG: - if (!link_working_unknown(l_ptr) && l_ptr->peer_session) { + if (!link_working_unknown(l_ptr) && + (l_ptr->peer_session != INVALID_SESSION)) { if (msg_session(msg) == l_ptr->peer_session) { dbg("Duplicate RESET: %u<->%u\n", msg_session(msg), l_ptr->peer_session); @@ -2424,7 +2426,7 @@ void tipc_link_changeover(struct link *l_ptr) } msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); dbg("Link changeover requires %u tunnel messages\n", msgcount); @@ -2479,7 +2481,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) struct tipc_msg tunnel_hdr; msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); iter = l_ptr->first_out; @@ -2672,10 +2674,12 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) u32 pack_sz = link_max_pkt(l_ptr); u32 fragm_sz = pack_sz - INT_H_SIZE; u32 fragm_no = 1; - u32 destaddr = msg_destnode(inmsg); + u32 destaddr; if (msg_short(inmsg)) destaddr = l_ptr->addr; + else + destaddr = msg_destnode(inmsg); if (msg_routed(inmsg)) msg_set_prevnode(inmsg, tipc_own_addr); @@ -2683,7 +2687,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) /* Prepare reusable fragment header: */ msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, destaddr); + INT_H_SIZE, destaddr); msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg)); msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++)); msg_set_fragm_no(&fragm_hdr, fragm_no); @@ -2931,7 +2935,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) * Returns pointer to link (or 0 if invalid link name). */ -static struct link *link_find_link(const char *name, struct node **node) +static struct link *link_find_link(const char *name, struct tipc_node **node) { struct link_name link_name_parts; struct bearer *b_ptr; @@ -2961,7 +2965,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space struct tipc_link_config *args; u32 new_value; struct link *l_ptr; - struct node *node; + struct tipc_node *node; int res; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG)) @@ -2994,7 +2998,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space link_set_supervision_props(l_ptr, new_value); tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, new_value, 0, 0); - res = TIPC_OK; + res = 0; } break; case TIPC_CMD_SET_LINK_PRI: @@ -3003,14 +3007,14 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space l_ptr->priority = new_value; tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, new_value, 0); - res = TIPC_OK; + res = 0; } break; case TIPC_CMD_SET_LINK_WINDOW: if ((new_value >= TIPC_MIN_LINK_WIN) && (new_value <= TIPC_MAX_LINK_WIN)) { tipc_link_set_queue_limits(l_ptr, new_value); - res = TIPC_OK; + res = 0; } break; } @@ -3039,7 +3043,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ { char *link_name; struct link *l_ptr; - struct node *node; + struct tipc_node *node; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -3087,7 +3091,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) { struct print_buf pb; struct link *l_ptr; - struct node *node; + struct tipc_node *node; char *status; u32 profile_total = 0; @@ -3203,7 +3207,7 @@ int link_control(const char *name, u32 op, u32 val) int res = -EINVAL; struct link *l_ptr; u32 bearer_id; - struct node * node; + struct tipc_node * node; u32 a; a = link_name2addr(name, &bearer_id); @@ -3226,7 +3230,7 @@ int link_control(const char *name, u32 op, u32 val) if (op == TIPC_CMD_UNBLOCK_LINK) { l_ptr->blocked = 0; } - res = TIPC_OK; + res = 0; } tipc_node_unlock(node); } @@ -3245,7 +3249,7 @@ int link_control(const char *name, u32 op, u32 val) u32 tipc_link_get_max_pkt(u32 dest, u32 selector) { - struct node *n_ptr; + struct tipc_node *n_ptr; struct link *l_ptr; u32 res = MAX_PKT_DEFAULT; diff --git a/net/tipc/link.h b/net/tipc/link.h index 52f3e7c1871f..6a51e38ad25c 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -116,7 +116,7 @@ struct link { char name[TIPC_MAX_LINK_NAME]; struct tipc_media_addr media_addr; struct timer_list timer; - struct node *owner; + struct tipc_node *owner; struct list_head link_list; /* Management and link supervision data */ diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 696a8633df75..73dcd00d674e 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,7 +41,9 @@ #include "bearer.h" -void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str) +#ifdef CONFIG_TIPC_DEBUG + +void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) { u32 usr = msg_user(msg); tipc_printf(buf, str); @@ -228,13 +230,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str switch (usr) { case CONN_MANAGER: - case NAME_DISTRIBUTOR: case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (msg_short(msg)) - break; /* No error */ switch (msg_errcode(msg)) { case TIPC_OK: break; @@ -315,9 +314,11 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str } tipc_printf(buf, "\n"); if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) { - tipc_msg_print(buf,msg_get_wrapped(msg)," /"); + tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); } if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) { - tipc_msg_print(buf,msg_get_wrapped(msg)," /"); + tipc_msg_dbg(buf, msg_get_wrapped(msg), " /"); } } + +#endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ad487e8abcc2..7ee6ae238147 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -2,7 +2,7 @@ * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -75,6 +75,14 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } +static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) +{ + u32 temp = msg->hdr[a]; + + msg->hdr[a] = msg->hdr[b]; + msg->hdr[b] = temp; +} + /* * Word 0 */ @@ -119,9 +127,9 @@ static inline int msg_non_seq(struct tipc_msg *m) return msg_bits(m, 0, 20, 1); } -static inline void msg_set_non_seq(struct tipc_msg *m) +static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 0, 20, 1, 1); + msg_set_bits(m, 0, 20, 1, n); } static inline int msg_dest_droppable(struct tipc_msg *m) @@ -224,6 +232,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) msg_set_bits(m, 2, 0, 0xffff, n); } +/* + * TIPC may utilize the "link ack #" and "link seq #" fields of a short + * message header to hold the destination node for the message, since the + * normal "dest node" field isn't present. This cache is only referenced + * when required, so populating the cache of a longer message header is + * harmless (as long as the header has the two link sequence fields present). + * + * Note: Host byte order is OK here, since the info never goes off-card. + */ + +static inline u32 msg_destnode_cache(struct tipc_msg *m) +{ + return m->hdr[2]; +} + +static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) +{ + m->hdr[2] = dnode; +} /* * Words 3-10 @@ -325,7 +352,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w0:|vers |msg usr|hdr sz |n|resrv| packet size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w1:|m typ|rsv=0| sequence gap | broadcast ack no | + w1:|m typ| sequence gap | broadcast ack no | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w2:| link level ack no/bc_gap_from | seq no / bcast_gap_to | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -388,12 +415,12 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) static inline u32 msg_seq_gap(struct tipc_msg *m) { - return msg_bits(m, 1, 16, 0xff); + return msg_bits(m, 1, 16, 0x1fff); } static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 1, 16, 0xff, n); + msg_set_bits(m, 1, 16, 0x1fff, n); } static inline u32 msg_req_links(struct tipc_msg *m) @@ -696,7 +723,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m) static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 err, u32 hsize, u32 destnode) + u32 hsize, u32 destnode) { memset(m, 0, hsize); msg_set_version(m); @@ -705,7 +732,6 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, msg_set_size(m, hsize); msg_set_prevnode(m, tipc_own_addr); msg_set_type(m, type); - msg_set_errcode(m, err); if (!msg_short(m)) { msg_set_orignode(m, tipc_own_addr); msg_set_destnode(m, destnode); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 39fd1619febf..10a69894e2fd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -41,9 +41,6 @@ #include "msg.h" #include "name_distr.h" -#undef DBG_OUTPUT -#define DBG_OUTPUT NULL - #define ITEM_SIZE sizeof(struct distr_item) /** @@ -106,8 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) if (buf != NULL) { msg = buf_msg(buf); - msg_init(msg, NAME_DISTRIBUTOR, type, TIPC_OK, - LONG_H_SIZE, dest); + msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest); msg_set_size(msg, LONG_H_SIZE + size); } return buf; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ac7dfdda7973..cd72e22b132b 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -2,7 +2,7 @@ * net/tipc/name_table.c: TIPC name table code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,9 +52,16 @@ static int tipc_nametbl_size = 1024; /* must be a power of 2 */ * struct sub_seq - container for all published instances of a name sequence * @lower: name sequence lower bound * @upper: name sequence upper bound - * @node_list: circular list of matching publications with >= node scope - * @cluster_list: circular list of matching publications with >= cluster scope - * @zone_list: circular list of matching publications with >= zone scope + * @node_list: circular list of publications made by own node + * @cluster_list: circular list of publications made by own cluster + * @zone_list: circular list of publications made by own zone + * @node_list_size: number of entries in "node_list" + * @cluster_list_size: number of entries in "cluster_list" + * @zone_list_size: number of entries in "zone_list" + * + * Note: The zone list always contains at least one entry, since all + * publications of the associated name sequence belong to it. + * (The cluster and node lists may be empty.) */ struct sub_seq { @@ -63,6 +70,9 @@ struct sub_seq { struct publication *node_list; struct publication *cluster_list; struct publication *zone_list; + u32 node_list_size; + u32 cluster_list_size; + u32 zone_list_size; }; /** @@ -74,7 +84,7 @@ struct sub_seq { * @first_free: array index of first unused sub-sequence entry * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' - * @lock: spinlock controlling access to name sequence structure + * @lock: spinlock controlling access to publication lists of all sub-sequences */ struct name_seq { @@ -317,6 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n", publ, node, publ->node, publ->subscr.node); + sseq->zone_list_size++; if (!sseq->zone_list) sseq->zone_list = publ->zone_list_next = publ; else { @@ -325,6 +336,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, } if (in_own_cluster(node)) { + sseq->cluster_list_size++; if (!sseq->cluster_list) sseq->cluster_list = publ->cluster_list_next = publ; else { @@ -335,6 +347,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, } if (node == tipc_own_addr) { + sseq->node_list_size++; if (!sseq->node_list) sseq->node_list = publ->node_list_next = publ; else { @@ -411,6 +424,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i } else { sseq->zone_list = NULL; } + sseq->zone_list_size--; /* Remove publication from cluster scope list, if present */ @@ -439,6 +453,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i } else { sseq->cluster_list = NULL; } + sseq->cluster_list_size--; } end_cluster: @@ -469,6 +484,7 @@ end_cluster: } else { sseq->node_list = NULL; } + sseq->node_list_size--; } end_node: @@ -709,15 +725,18 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, if (sseq->lower > upper) break; - publ = sseq->cluster_list; - if (publ && (publ->scope <= limit)) + + publ = sseq->node_list; + if (publ) { do { - if (publ->node == tipc_own_addr) + if (publ->scope <= limit) tipc_port_list_add(dports, publ->ref); - else - res = 1; - publ = publ->cluster_list_next; - } while (publ != sseq->cluster_list); + publ = publ->node_list_next; + } while (publ != sseq->node_list); + } + + if (sseq->cluster_list_size != sseq->node_list_size) + res = 1; } spin_unlock_bh(&seq->lock); @@ -905,6 +924,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, struct sub_seq *sseq; char typearea[11]; + if (seq->first_free == 0) + return; + sprintf(typearea, "%-10u", seq->type); if (depth == 1) { @@ -915,7 +937,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { tipc_printf(buf, "%s ", typearea); + spin_lock_bh(&seq->lock); subseq_list(sseq, buf, depth, index); + spin_unlock_bh(&seq->lock); sprintf(typearea, "%10s", " "); } } @@ -1050,15 +1074,12 @@ void tipc_nametbl_dump(void) int tipc_nametbl_init(void) { - int array_size = sizeof(struct hlist_head) * tipc_nametbl_size; - - table.types = kzalloc(array_size, GFP_ATOMIC); + table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), + GFP_ATOMIC); if (!table.types) return -ENOMEM; - write_lock_bh(&tipc_nametbl_lock); table.local_publ_count = 0; - write_unlock_bh(&tipc_nametbl_lock); return 0; } diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index b9e7cd336d76..139882d4ed00 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -76,7 +76,7 @@ struct publication { u32 node; u32 ref; u32 key; - struct node_subscr subscr; + struct tipc_node_subscr subscr; struct list_head local_list; struct list_head pport_list; struct publication *node_list_next; diff --git a/net/tipc/net.c b/net/tipc/net.c index c39c76201e8e..7906608bf510 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -118,7 +118,7 @@ DEFINE_RWLOCK(tipc_net_lock); struct network tipc_net = { NULL }; -struct node *tipc_net_select_remote_node(u32 addr, u32 ref) +struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) { return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref); } @@ -165,7 +165,7 @@ static int net_init(void) if (!tipc_net.zones) { return -ENOMEM; } - return TIPC_OK; + return 0; } static void net_stop(void) @@ -266,7 +266,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(void) +int tipc_net_start(u32 addr) { char addr_string[16]; int res; @@ -274,6 +274,10 @@ int tipc_net_start(void) if (tipc_mode != TIPC_NODE_MODE) return -ENOPROTOOPT; + tipc_subscr_stop(); + tipc_cfg_stop(); + + tipc_own_addr = addr; tipc_mode = TIPC_NET_MODE; tipc_named_reinit(); tipc_port_reinit(); @@ -284,14 +288,14 @@ int tipc_net_start(void) (res = tipc_bclink_init())) { return res; } - tipc_subscr_stop(); - tipc_cfg_stop(); + tipc_k_signal((Handler)tipc_subscr_start, 0); tipc_k_signal((Handler)tipc_cfg_init, 0); + info("Started in network mode\n"); info("Own node address %s, network identity %u\n", addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); - return TIPC_OK; + return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index a6a0e9976ac9..de2b9ad8f646 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -55,10 +55,10 @@ extern rwlock_t tipc_net_lock; void tipc_net_remove_as_router(u32 router); void tipc_net_send_external_routes(u32 dest); void tipc_net_route_msg(struct sk_buff *buf); -struct node *tipc_net_select_remote_node(u32 addr, u32 ref); +struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref); u32 tipc_net_select_router(u32 addr, u32 ref); -int tipc_net_start(void); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6a7f7b4c2595..c387217bb230 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -2,7 +2,7 @@ * net/tipc/netlink.c: TIPC configuration handling * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,15 +45,17 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *req_nlh = info->nlhdr; struct tipc_genlmsghdr *req_userhdr = info->userhdr; int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); + u16 cmd; if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) - rep_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + cmd = TIPC_CMD_NOT_NET_ADMIN; else - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, - req_userhdr->cmd, - NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); + cmd = req_userhdr->cmd; + + rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, + NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, + NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), + hdr_space); if (rep_buf) { skb_push(rep_buf, hdr_space); diff --git a/net/tipc/node.c b/net/tipc/node.c index 598f4d3a0098..20d98c56e152 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -46,22 +46,46 @@ #include "bearer.h" #include "name_distr.h" -void node_print(struct print_buf *buf, struct node *n_ptr, char *str); -static void node_lost_contact(struct node *n_ptr); -static void node_established_contact(struct node *n_ptr); +void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str); +static void node_lost_contact(struct tipc_node *n_ptr); +static void node_established_contact(struct tipc_node *n_ptr); -struct node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ +struct tipc_node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ + +static DEFINE_SPINLOCK(node_create_lock); u32 tipc_own_tag = 0; -struct node *tipc_node_create(u32 addr) +/** + * tipc_node_create - create neighboring node + * + * Currently, this routine is called by neighbor discovery code, which holds + * net_lock for reading only. We must take node_create_lock to ensure a node + * isn't created twice if two different bearers discover the node at the same + * time. (It would be preferable to switch to holding net_lock in write mode, + * but this is a non-trivial change.) + */ + +struct tipc_node *tipc_node_create(u32 addr) { struct cluster *c_ptr; - struct node *n_ptr; - struct node **curr_node; + struct tipc_node *n_ptr; + struct tipc_node **curr_node; + + spin_lock_bh(&node_create_lock); + + for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { + if (addr < n_ptr->addr) + break; + if (addr == n_ptr->addr) { + spin_unlock_bh(&node_create_lock); + return n_ptr; + } + } n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC); if (!n_ptr) { + spin_unlock_bh(&node_create_lock); warn("Node creation failed, no memory\n"); return NULL; } @@ -71,6 +95,7 @@ struct node *tipc_node_create(u32 addr) c_ptr = tipc_cltr_create(addr); } if (!c_ptr) { + spin_unlock_bh(&node_create_lock); kfree(n_ptr); return NULL; } @@ -91,10 +116,11 @@ struct node *tipc_node_create(u32 addr) } } (*curr_node) = n_ptr; + spin_unlock_bh(&node_create_lock); return n_ptr; } -void tipc_node_delete(struct node *n_ptr) +void tipc_node_delete(struct tipc_node *n_ptr) { if (!n_ptr) return; @@ -120,7 +146,7 @@ void tipc_node_delete(struct node *n_ptr) * Link becomes active (alone or shared) or standby, depending on its priority. */ -void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) +void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr) { struct link **active = &n_ptr->active_links[0]; @@ -154,7 +180,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) * node_select_active_links - select active link */ -static void node_select_active_links(struct node *n_ptr) +static void node_select_active_links(struct tipc_node *n_ptr) { struct link **active = &n_ptr->active_links[0]; u32 i; @@ -182,7 +208,7 @@ static void node_select_active_links(struct node *n_ptr) * tipc_node_link_down - handle loss of link */ -void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr) +void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr) { struct link **active; @@ -209,30 +235,30 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr) node_lost_contact(n_ptr); } -int tipc_node_has_active_links(struct node *n_ptr) +int tipc_node_has_active_links(struct tipc_node *n_ptr) { return (n_ptr && ((n_ptr->active_links[0]) || (n_ptr->active_links[1]))); } -int tipc_node_has_redundant_links(struct node *n_ptr) +int tipc_node_has_redundant_links(struct tipc_node *n_ptr) { return (n_ptr->working_links > 1); } -static int tipc_node_has_active_routes(struct node *n_ptr) +static int tipc_node_has_active_routes(struct tipc_node *n_ptr) { return (n_ptr && (n_ptr->last_router >= 0)); } -int tipc_node_is_up(struct node *n_ptr) +int tipc_node_is_up(struct tipc_node *n_ptr) { return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); } -struct node *tipc_node_attach_link(struct link *l_ptr) +struct tipc_node *tipc_node_attach_link(struct link *l_ptr) { - struct node *n_ptr = tipc_node_find(l_ptr->addr); + struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); if (!n_ptr) n_ptr = tipc_node_create(l_ptr->addr); @@ -259,7 +285,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr) return NULL; } -void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr) +void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; tipc_net.zones[tipc_zone(l_ptr->addr)]->links--; @@ -312,7 +338,7 @@ void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr) * */ -static void node_established_contact(struct node *n_ptr) +static void node_established_contact(struct tipc_node *n_ptr) { struct cluster *c_ptr; @@ -358,10 +384,10 @@ static void node_established_contact(struct node *n_ptr) tipc_highest_allowed_slave); } -static void node_lost_contact(struct node *n_ptr) +static void node_lost_contact(struct tipc_node *n_ptr) { struct cluster *c_ptr; - struct node_subscr *ns, *tns; + struct tipc_node_subscr *ns, *tns; char addr_string[16]; u32 i; @@ -440,9 +466,9 @@ static void node_lost_contact(struct node *n_ptr) * Called by when cluster local lookup has failed. */ -struct node *tipc_node_select_next_hop(u32 addr, u32 selector) +struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector) { - struct node *n_ptr; + struct tipc_node *n_ptr; u32 router_addr; if (!tipc_addr_domain_valid(addr)) @@ -487,7 +513,7 @@ struct node *tipc_node_select_next_hop(u32 addr, u32 selector) * Uses a deterministic and fair algorithm for selecting router node. */ -u32 tipc_node_select_router(struct node *n_ptr, u32 ref) +u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref) { u32 ulim; u32 mask; @@ -525,7 +551,7 @@ u32 tipc_node_select_router(struct node *n_ptr, u32 ref) return tipc_addr(own_zone(), own_cluster(), r); } -void tipc_node_add_router(struct node *n_ptr, u32 router) +void tipc_node_add_router(struct tipc_node *n_ptr, u32 router) { u32 r_num = tipc_node(router); @@ -536,7 +562,7 @@ void tipc_node_add_router(struct node *n_ptr, u32 router) !n_ptr->routers[n_ptr->last_router]); } -void tipc_node_remove_router(struct node *n_ptr, u32 router) +void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router) { u32 r_num = tipc_node(router); @@ -554,7 +580,7 @@ void tipc_node_remove_router(struct node *n_ptr, u32 router) } #if 0 -void node_print(struct print_buf *buf, struct node *n_ptr, char *str) +void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str) { u32 i; @@ -571,15 +597,17 @@ void node_print(struct print_buf *buf, struct node *n_ptr, char *str) u32 tipc_available_nodes(const u32 domain) { - struct node *n_ptr; + struct tipc_node *n_ptr; u32 cnt = 0; + read_lock_bh(&tipc_net_lock); for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { if (!in_scope(domain, n_ptr->addr)) continue; if (tipc_node_is_up(n_ptr)) cnt++; } + read_unlock_bh(&tipc_net_lock); return cnt; } @@ -587,7 +615,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) { u32 domain; struct sk_buff *buf; - struct node *n_ptr; + struct tipc_node *n_ptr; struct tipc_node_info node_info; u32 payload_size; @@ -599,19 +627,26 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network address)"); - if (!tipc_nodes) + read_lock_bh(&tipc_net_lock); + if (!tipc_nodes) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); + } /* For now, get space for all other nodes (will need to modify this when slave nodes are supported */ payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1); - if (payload_size > 32768u) + if (payload_size > 32768u) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many nodes)"); + } buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) + if (!buf) { + read_unlock_bh(&tipc_net_lock); return NULL; + } /* Add TLVs for all nodes in scope */ @@ -624,6 +659,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) &node_info, sizeof(node_info)); } + read_unlock_bh(&tipc_net_lock); return buf; } @@ -631,7 +667,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) { u32 domain; struct sk_buff *buf; - struct node *n_ptr; + struct tipc_node *n_ptr; struct tipc_link_info link_info; u32 payload_size; @@ -646,16 +682,22 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (tipc_mode != TIPC_NET_MODE) return tipc_cfg_reply_none(); + read_lock_bh(&tipc_net_lock); + /* Get space for all unicast links + multicast link */ payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1); - if (payload_size > 32768u) + if (payload_size > 32768u) { + read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (too many links)"); + } buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) + if (!buf) { + read_unlock_bh(&tipc_net_lock); return NULL; + } /* Add TLV for broadcast link */ @@ -671,6 +713,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!in_scope(domain, n_ptr->addr)) continue; + tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { if (!n_ptr->links[i]) continue; @@ -680,7 +723,9 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); } + tipc_node_unlock(n_ptr); } + read_unlock_bh(&tipc_net_lock); return buf; } diff --git a/net/tipc/node.h b/net/tipc/node.h index cd1882654bbb..6f990da5d143 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -43,7 +43,7 @@ #include "bearer.h" /** - * struct node - TIPC node structure + * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure * @owner: pointer to cluster that node belongs to @@ -68,11 +68,11 @@ * @defragm: list of partially reassembled b'cast message fragments from node */ -struct node { +struct tipc_node { u32 addr; spinlock_t lock; struct cluster *owner; - struct node *next; + struct tipc_node *next; struct list_head nsub; struct link *active_links[2]; struct link *links[MAX_BEARERS]; @@ -94,26 +94,26 @@ struct node { } bclink; }; -extern struct node *tipc_nodes; +extern struct tipc_node *tipc_nodes; extern u32 tipc_own_tag; -struct node *tipc_node_create(u32 addr); -void tipc_node_delete(struct node *n_ptr); -struct node *tipc_node_attach_link(struct link *l_ptr); -void tipc_node_detach_link(struct node *n_ptr, struct link *l_ptr); -void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr); -void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr); -int tipc_node_has_active_links(struct node *n_ptr); -int tipc_node_has_redundant_links(struct node *n_ptr); -u32 tipc_node_select_router(struct node *n_ptr, u32 ref); -struct node *tipc_node_select_next_hop(u32 addr, u32 selector); -int tipc_node_is_up(struct node *n_ptr); -void tipc_node_add_router(struct node *n_ptr, u32 router); -void tipc_node_remove_router(struct node *n_ptr, u32 router); +struct tipc_node *tipc_node_create(u32 addr); +void tipc_node_delete(struct tipc_node *n_ptr); +struct tipc_node *tipc_node_attach_link(struct link *l_ptr); +void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); +void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); +void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); +int tipc_node_has_active_links(struct tipc_node *n_ptr); +int tipc_node_has_redundant_links(struct tipc_node *n_ptr); +u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref); +struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector); +int tipc_node_is_up(struct tipc_node *n_ptr); +void tipc_node_add_router(struct tipc_node *n_ptr, u32 router); +void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -static inline struct node *tipc_node_find(u32 addr) +static inline struct tipc_node *tipc_node_find(u32 addr) { if (likely(in_own_cluster(addr))) return tipc_local_nodes[tipc_node(addr)]; @@ -126,19 +126,19 @@ static inline struct node *tipc_node_find(u32 addr) return NULL; } -static inline struct node *tipc_node_select(u32 addr, u32 selector) +static inline struct tipc_node *tipc_node_select(u32 addr, u32 selector) { if (likely(in_own_cluster(addr))) return tipc_local_nodes[tipc_node(addr)]; return tipc_node_select_next_hop(addr, selector); } -static inline void tipc_node_lock(struct node *n_ptr) +static inline void tipc_node_lock(struct tipc_node *n_ptr) { spin_lock_bh(&n_ptr->lock); } -static inline void tipc_node_unlock(struct node *n_ptr) +static inline void tipc_node_unlock(struct tipc_node *n_ptr) { spin_unlock_bh(&n_ptr->lock); } diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 8ecbd0fb6103..19194d476a9e 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -44,7 +44,7 @@ * tipc_nodesub_subscribe - create "node down" subscription for specified node */ -void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, +void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down) { if (addr == tipc_own_addr) { @@ -69,7 +69,7 @@ void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, * tipc_nodesub_unsubscribe - cancel "node down" subscription (if any) */ -void tipc_nodesub_unsubscribe(struct node_subscr *node_sub) +void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) { if (!node_sub->node) return; diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index 5f3f5859b84c..006ed739f515 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -42,22 +42,22 @@ typedef void (*net_ev_handler) (void *usr_handle); /** - * struct node_subscr - "node down" subscription entry + * struct tipc_node_subscr - "node down" subscription entry * @node: ptr to node structure of interest (or NULL, if none) * @handle_node_down: routine to invoke when node fails * @usr_handle: argument to pass to routine when node fails * @nodesub_list: adjacent entries in list of subscriptions for the node */ -struct node_subscr { - struct node *node; +struct tipc_node_subscr { + struct tipc_node *node; net_ev_handler handle_node_down; void *usr_handle; struct list_head nodesub_list; }; -void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, +void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); -void tipc_nodesub_unsubscribe(struct node_subscr *node_sub); +void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 2f5806410c64..e70d27ea6578 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,12 +211,12 @@ exit: } /** - * tipc_createport_raw - create a native TIPC port + * tipc_createport_raw - create a generic TIPC port * - * Returns local port reference + * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -u32 tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport_raw(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) @@ -228,26 +228,21 @@ u32 tipc_createport_raw(void *usr_handle, p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC); if (!p_ptr) { warn("Port creation failed, no memory\n"); - return 0; + return NULL; } ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); if (!ref) { warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); - return 0; + return NULL; } - tipc_port_lock(ref); p_ptr->publ.usr_handle = usr_handle; p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; - msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, - 0); - msg_set_orignode(msg, tipc_own_addr); - msg_set_prevnode(msg, tipc_own_addr); + msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); - msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; INIT_LIST_HEAD(&p_ptr->wait_list); @@ -262,8 +257,7 @@ u32 tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->port_list); list_add_tail(&p_ptr->port_list, &ports); spin_unlock_bh(&tipc_port_list_lock); - tipc_port_unlock(p_ptr); - return ref; + return &(p_ptr->publ); } int tipc_deleteport(u32 ref) @@ -297,7 +291,7 @@ int tipc_deleteport(u32 ref) kfree(p_ptr); dbg("Deleted port %u\n", ref); tipc_net_route_msg(buf); - return TIPC_OK; + return 0; } /** @@ -342,7 +336,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable) return -EINVAL; *isunreliable = port_unreliable(p_ptr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) @@ -354,7 +348,7 @@ int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) return -EINVAL; msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } static int port_unreturnable(struct port *p_ptr) @@ -371,7 +365,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) return -EINVAL; *isunrejectable = port_unreturnable(p_ptr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) @@ -383,7 +377,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) return -EINVAL; msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } /* @@ -402,10 +396,10 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, buf = buf_acquire(LONG_H_SIZE); if (buf) { msg = buf_msg(buf); - msg_init(msg, usr, type, err, LONG_H_SIZE, destnode); + msg_init(msg, usr, type, LONG_H_SIZE, destnode); + msg_set_errcode(msg, err); msg_set_destport(msg, destport); msg_set_origport(msg, origport); - msg_set_destnode(msg, destnode); msg_set_orignode(msg, orignode); msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); @@ -446,17 +440,19 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) return data_sz; } rmsg = buf_msg(rbuf); - msg_init(rmsg, imp, msg_type(msg), err, hdr_sz, msg_orignode(msg)); + msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg)); + msg_set_errcode(rmsg, err); msg_set_destport(rmsg, msg_origport(msg)); - msg_set_prevnode(rmsg, tipc_own_addr); msg_set_origport(rmsg, msg_destport(msg)); - if (msg_short(msg)) + if (msg_short(msg)) { msg_set_orignode(rmsg, tipc_own_addr); - else + /* leave name type & instance as zeroes */ + } else { msg_set_orignode(rmsg, msg_destnode(msg)); + msg_set_nametype(rmsg, msg_nametype(msg)); + msg_set_nameinst(rmsg, msg_nameinst(msg)); + } msg_set_size(rmsg, data_sz + hdr_sz); - msg_set_nametype(rmsg, msg_nametype(msg)); - msg_set_nameinst(rmsg, msg_nameinst(msg)); skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz); /* send self-abort message when rejecting on a connected port */ @@ -778,6 +774,7 @@ void tipc_port_reinit(void) msg = &p_ptr->publ.phdr; if (msg_orignode(msg) == tipc_own_addr) break; + msg_set_prevnode(msg, tipc_own_addr); msg_set_orignode(msg, tipc_own_addr); } spin_unlock_bh(&tipc_port_list_lock); @@ -838,16 +835,13 @@ static void port_dispatcher_sigh(void *dummy) u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); + if (unlikely(!cb)) + goto reject; if (unlikely(!connected)) { - if (unlikely(published)) + if (tipc_connect2port(dref, &orig)) goto reject; - tipc_connect2port(dref,&orig); - } - if (unlikely(msg_origport(msg) != peer_port)) - goto reject; - if (unlikely(msg_orignode(msg) != peer_node)) - goto reject; - if (unlikely(!cb)) + } else if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) goto reject; if (unlikely(++p_ptr->publ.conn_unacked >= TIPC_FLOW_CONTROL_WIN)) @@ -862,9 +856,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_msg_event cb = up_ptr->msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) + if (unlikely(!cb || connected)) goto reject; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -877,11 +869,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_named_msg_event cb = up_ptr->named_msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) - goto reject; - if (unlikely(!published)) + if (unlikely(!cb || connected || !published)) goto reject; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); @@ -908,11 +896,10 @@ err: u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); - if (!connected || !cb) - break; - if (msg_origport(msg) != peer_port) + if (!cb || !connected) break; - if (msg_orignode(msg) != peer_node) + if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) break; tipc_disconnect(dref); skb_pull(buf, msg_hdr_sz(msg)); @@ -924,7 +911,7 @@ err: tipc_msg_err_event cb = up_ptr->err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -937,7 +924,7 @@ err: up_ptr->named_err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); @@ -976,7 +963,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) tipc_k_signal((Handler)port_dispatcher_sigh, 0); } spin_unlock_bh(&queue_lock); - return TIPC_OK; + return 0; } /* @@ -1053,15 +1040,14 @@ int tipc_createport(u32 user_ref, { struct user_port *up_ptr; struct port *p_ptr; - u32 ref; up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); if (!up_ptr) { warn("Port creation failed, no memory\n"); return -ENOMEM; } - ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); - p_ptr = tipc_port_lock(ref); + p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, + port_wakeup, importance); if (!p_ptr) { kfree(up_ptr); return -ENOMEM; @@ -1081,16 +1067,15 @@ int tipc_createport(u32 user_ref, INIT_LIST_HEAD(&up_ptr->uport_list); tipc_reg_add_port(up_ptr); *portref = p_ptr->publ.ref; - dbg(" tipc_createport: %x with ref %u\n", p_ptr, p_ptr->publ.ref); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_ownidentity(u32 ref, struct tipc_portid *id) { id->ref = ref; id->node = tipc_own_addr; - return TIPC_OK; + return 0; } int tipc_portimportance(u32 ref, unsigned int *importance) @@ -1102,7 +1087,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance) return -EINVAL; *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_set_portimportance(u32 ref, unsigned int imp) @@ -1117,7 +1102,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) return -EINVAL; msg_set_importance(&p_ptr->publ.phdr, (u32)imp); tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } @@ -1152,7 +1137,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->publ.published = 1; - res = TIPC_OK; + res = 0; } exit: tipc_port_unlock(p_ptr); @@ -1175,7 +1160,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) tipc_nametbl_withdraw(publ->type, publ->lower, publ->ref, publ->key); } - res = TIPC_OK; + res = 0; } else { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -1189,7 +1174,7 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) break; tipc_nametbl_withdraw(publ->type, publ->lower, publ->ref, publ->key); - res = TIPC_OK; + res = 0; break; } } @@ -1233,7 +1218,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) tipc_nodesub_subscribe(&p_ptr->subscription,peer->node, (void *)(unsigned long)ref, (net_ev_handler)port_handle_node_down); - res = TIPC_OK; + res = 0; exit: tipc_port_unlock(p_ptr); p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); @@ -1255,7 +1240,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe( &((struct port *)tp_ptr)->subscription); - res = TIPC_OK; + res = 0; } else { res = -ENOTCONN; } @@ -1320,7 +1305,7 @@ int tipc_isconnected(u32 ref, int *isconnected) return -EINVAL; *isconnected = p_ptr->publ.connected; tipc_port_unlock(p_ptr); - return TIPC_OK; + return 0; } int tipc_peer(u32 ref, struct tipc_portid *peer) @@ -1334,7 +1319,7 @@ int tipc_peer(u32 ref, struct tipc_portid *peer) if (p_ptr->publ.connected) { peer->ref = port_peerport(p_ptr); peer->node = port_peernode(p_ptr); - res = TIPC_OK; + res = 0; } else res = -ENOTCONN; tipc_port_unlock(p_ptr); diff --git a/net/tipc/port.h b/net/tipc/port.h index e5f8c16429bd..ff31ee4a1dc3 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -105,7 +105,7 @@ struct port { u32 probing_interval; u32 last_in_seqno; struct timer_list timer; - struct node_subscr subscription; + struct tipc_node_subscr subscription; }; extern spinlock_t tipc_port_list_lock; diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 89cbab24d08f..414fc34b8bea 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -123,7 +123,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) tipc_ref_table.index_mask = actual_size - 1; tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; - return TIPC_OK; + return 0; } /** @@ -142,9 +142,13 @@ void tipc_ref_table_stop(void) /** * tipc_ref_acquire - create reference to an object * - * Return a unique reference value which can be translated back to the pointer - * 'object' at a later time. Also, pass back a pointer to the lock protecting - * the object, but without locking it. + * Register an object pointer in reference table and lock the object. + * Returns a unique reference value that is used from then on to retrieve the + * object pointer, or to determine that the object has been deregistered. + * + * Note: The object is returned in the locked state so that the caller can + * register a partially initialized object, without running the risk that + * the object will be accessed before initialization is complete. */ u32 tipc_ref_acquire(void *object, spinlock_t **lock) @@ -178,13 +182,13 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) ref = (next_plus_upper & ~index_mask) + index; entry->ref = ref; entry->object = object; - spin_unlock_bh(&entry->lock); *lock = &entry->lock; } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { index = tipc_ref_table.init_point++; entry = &(tipc_ref_table.entries[index]); spin_lock_init(&entry->lock); + spin_lock_bh(&entry->lock); ref = tipc_ref_table.start_mask + index; entry->ref = ref; entry->object = object; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 230f9ca2ad6b..1848693ebb82 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,6 +63,7 @@ struct tipc_sock { struct sock sk; struct tipc_port *p; + struct tipc_portid peer_name; }; #define tipc_sk(sk) ((struct tipc_sock *)(sk)) @@ -188,7 +189,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) const struct proto_ops *ops; socket_state state; struct sock *sk; - u32 portref; + struct tipc_port *tp_ptr; /* Validate arguments */ @@ -224,9 +225,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) /* Allocate TIPC port for socket to use */ - portref = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); - if (unlikely(portref == 0)) { + tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); + if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; } @@ -239,12 +240,14 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) sock_init_data(sock, sk); sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); sk->sk_backlog_rcv = backlog_rcv; - tipc_sk(sk)->p = tipc_get_port(portref); + tipc_sk(sk)->p = tp_ptr; + + spin_unlock_bh(tp_ptr->lock); if (sock->state == SS_READY) { - tipc_set_portunreturnable(portref, 1); + tipc_set_portunreturnable(tp_ptr->ref, 1); if (sock->type == SOCK_DGRAM) - tipc_set_portunreliable(portref, 1); + tipc_set_portunreliable(tp_ptr->ref, 1); } atomic_inc(&tipc_user_count); @@ -375,27 +378,29 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) * @sock: socket structure * @uaddr: area for returned socket address * @uaddr_len: area for returned length of socket address - * @peer: 0 to obtain socket name, 1 to obtain peer socket name + * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Returns 0 on success, errno otherwise * - * NOTE: This routine doesn't need to take the socket lock since it doesn't - * access any non-constant socket information. + * NOTE: This routine doesn't need to take the socket lock since it only + * accesses socket information that is unchanging (or which changes in + * a completely predictable manner). */ static int get_name(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; - u32 res; + struct tipc_sock *tsock = tipc_sk(sock->sk); if (peer) { - res = tipc_peer(portref, &addr->addr.id); - if (res) - return res; + if ((sock->state != SS_CONNECTED) && + ((peer != 2) || (sock->state != SS_DISCONNECTING))) + return -ENOTCONN; + addr->addr.id.ref = tsock->peer_name.ref; + addr->addr.id.node = tsock->peer_name.node; } else { - tipc_ownidentity(portref, &addr->addr.id); + tipc_ownidentity(tsock->p->ref, &addr->addr.id); } *uaddr_len = sizeof(*addr); @@ -764,18 +769,17 @@ exit: static int auto_connect(struct socket *sock, struct tipc_msg *msg) { - struct tipc_port *tport = tipc_sk_port(sock->sk); - struct tipc_portid peer; + struct tipc_sock *tsock = tipc_sk(sock->sk); if (msg_errcode(msg)) { sock->state = SS_DISCONNECTING; return -ECONNREFUSED; } - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - tipc_connect2port(tport->ref, &peer); - tipc_set_portimportance(tport->ref, msg_importance(msg)); + tsock->peer_name.ref = msg_origport(msg); + tsock->peer_name.node = msg_orignode(msg); + tipc_connect2port(tsock->p->ref, &tsock->peer_name); + tipc_set_portimportance(tsock->p->ref, msg_importance(msg)); sock->state = SS_CONNECTED; return 0; } @@ -1131,7 +1135,7 @@ restart: /* Loop around if more data is required */ if ((sz_copied < buf_len) /* didn't get all requested data */ - && (!skb_queue_empty(&sock->sk->sk_receive_queue) || + && (!skb_queue_empty(&sk->sk_receive_queue) || (flags & MSG_WAITALL)) /* ... and more is ready or required */ && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */ @@ -1527,9 +1531,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_create(sock_net(sock->sk), new_sock, 0); if (!res) { struct sock *new_sk = new_sock->sk; - struct tipc_port *new_tport = tipc_sk_port(new_sk); + struct tipc_sock *new_tsock = tipc_sk(new_sk); + struct tipc_port *new_tport = new_tsock->p; u32 new_ref = new_tport->ref; - struct tipc_portid id; struct tipc_msg *msg = buf_msg(buf); lock_sock(new_sk); @@ -1543,9 +1547,9 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) /* Connect new socket to it's peer */ - id.ref = msg_origport(msg); - id.node = msg_orignode(msg); - tipc_connect2port(new_ref, &id); + new_tsock->peer_name.ref = msg_origport(msg); + new_tsock->peer_name.node = msg_orignode(msg); + tipc_connect2port(new_ref, &new_tsock->peer_name); new_sock->state = SS_CONNECTED; tipc_set_portimportance(new_ref, msg_importance(msg)); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 8c01ccd3626c..0747d8a9232f 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -1,8 +1,8 @@ /* - * net/tipc/subscr.c: TIPC subscription service + * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,27 +36,24 @@ #include "core.h" #include "dbg.h" -#include "subscr.h" #include "name_table.h" +#include "port.h" #include "ref.h" +#include "subscr.h" /** * struct subscriber - TIPC network topology subscriber - * @ref: object reference to subscriber object itself - * @lock: pointer to spinlock controlling access to subscriber object + * @port_ref: object reference to server port connecting to subscriber + * @lock: pointer to spinlock controlling access to subscriber's server port * @subscriber_list: adjacent subscribers in top. server's list of subscribers * @subscription_list: list of subscription objects for this subscriber - * @port_ref: object reference to port used to communicate with subscriber - * @swap: indicates if subscriber uses opposite endianness in its messages */ struct subscriber { - u32 ref; + u32 port_ref; spinlock_t *lock; struct list_head subscriber_list; struct list_head subscription_list; - u32 port_ref; - int swap; }; /** @@ -88,13 +85,14 @@ static struct top_srv topsrv = { 0 }; static u32 htohl(u32 in, int swap) { - char *c = (char *)∈ - - return swap ? ((c[3] << 3) + (c[2] << 2) + (c[1] << 1) + c[0]) : in; + return swap ? swab32(in) : in; } /** * subscr_send_event - send a message containing a tipc_event to the subscriber + * + * Note: Must not hold subscriber's server port lock, since tipc_send() will + * try to take the lock if the message is rejected and returned! */ static void subscr_send_event(struct subscription *sub, @@ -109,12 +107,12 @@ static void subscr_send_event(struct subscription *sub, msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->owner->swap); - sub->evt.found_lower = htohl(found_lower, sub->owner->swap); - sub->evt.found_upper = htohl(found_upper, sub->owner->swap); - sub->evt.port.ref = htohl(port_ref, sub->owner->swap); - sub->evt.port.node = htohl(node, sub->owner->swap); - tipc_send(sub->owner->port_ref, 1, &msg_sect); + sub->evt.event = htohl(event, sub->swap); + sub->evt.found_lower = htohl(found_lower, sub->swap); + sub->evt.found_upper = htohl(found_upper, sub->swap); + sub->evt.port.ref = htohl(port_ref, sub->swap); + sub->evt.port.node = htohl(node, sub->swap); + tipc_send(sub->server_ref, 1, &msg_sect); } /** @@ -151,13 +149,12 @@ void tipc_subscr_report_overlap(struct subscription *sub, u32 node, int must) { - dbg("Rep overlap %u:%u,%u<->%u,%u\n", sub->seq.type, sub->seq.lower, - sub->seq.upper, found_lower, found_upper); if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); + + sub->event_cb(sub, found_lower, found_upper, event, port_ref, node); } /** @@ -166,20 +163,18 @@ void tipc_subscr_report_overlap(struct subscription *sub, static void subscr_timeout(struct subscription *sub) { - struct subscriber *subscriber; - u32 subscriber_ref; + struct port *server_port; - /* Validate subscriber reference (in case subscriber is terminating) */ + /* Validate server port reference (in case subscriber is terminating) */ - subscriber_ref = sub->owner->ref; - subscriber = (struct subscriber *)tipc_ref_lock(subscriber_ref); - if (subscriber == NULL) + server_port = tipc_port_lock(sub->server_ref); + if (server_port == NULL) return; /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_ref_unlock(subscriber_ref); + tipc_port_unlock(server_port); return; } @@ -187,19 +182,21 @@ static void subscr_timeout(struct subscription *sub) tipc_nametbl_unsubscribe(sub); - /* Notify subscriber of timeout, then unlink subscription */ + /* Unlink subscription from subscriber */ - subscr_send_event(sub, - sub->evt.s.seq.lower, - sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, - 0, - 0); list_del(&sub->subscription_list); + /* Release subscriber's server port */ + + tipc_port_unlock(server_port); + + /* Notify subscriber of timeout */ + + subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + /* Now destroy subscription */ - tipc_ref_unlock(subscriber_ref); k_term_timer(&sub->timer); kfree(sub); atomic_dec(&topsrv.subscription_count); @@ -208,7 +205,7 @@ static void subscr_timeout(struct subscription *sub) /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber locked. + * Called with subscriber port locked. */ static void subscr_del(struct subscription *sub) @@ -222,7 +219,7 @@ static void subscr_del(struct subscription *sub) /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber locked. Routine must temporarily release this lock + * Called with subscriber port locked. Routine must temporarily release lock * to enable subscription timeout routine(s) to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * (This should work even in the unlikely event some other thread creates @@ -232,14 +229,21 @@ static void subscr_del(struct subscription *sub) static void subscr_terminate(struct subscriber *subscriber) { + u32 port_ref; struct subscription *sub; struct subscription *sub_temp; /* Invalidate subscriber reference */ - tipc_ref_discard(subscriber->ref); + port_ref = subscriber->port_ref; + subscriber->port_ref = 0; spin_unlock_bh(subscriber->lock); + /* Sever connection to subscriber */ + + tipc_shutdown(port_ref); + tipc_deleteport(port_ref); + /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, @@ -253,27 +257,25 @@ static void subscr_terminate(struct subscriber *subscriber) subscr_del(sub); } - /* Sever connection to subscriber */ - - tipc_shutdown(subscriber->port_ref); - tipc_deleteport(subscriber->port_ref); - /* Remove subscriber from topology server's subscriber list */ spin_lock_bh(&topsrv.lock); list_del(&subscriber->subscriber_list); spin_unlock_bh(&topsrv.lock); - /* Now destroy subscriber */ + /* Reclaim subscriber lock */ spin_lock_bh(subscriber->lock); + + /* Now destroy subscriber */ + kfree(subscriber); } /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber locked. Routine must temporarily release this lock + * Called with subscriber port locked. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -316,27 +318,25 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber locked + * Called with subscriber port locked. */ -static void subscr_subscribe(struct tipc_subscr *s, - struct subscriber *subscriber) +static struct subscription *subscr_subscribe(struct tipc_subscr *s, + struct subscriber *subscriber) { struct subscription *sub; + int swap; - /* Determine/update subscriber's endianness */ + /* Determine subscriber's endianness */ - if (s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)) - subscriber->swap = 0; - else - subscriber->swap = 1; + swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, subscriber->swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, subscriber->swap); + if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { + s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); subscr_cancel(s, subscriber); - return; + return NULL; } /* Refuse subscription if global limit exceeded */ @@ -345,63 +345,66 @@ static void subscr_subscribe(struct tipc_subscr *s, warn("Subscription rejected, subscription limit reached (%u)\n", tipc_max_subscriptions); subscr_terminate(subscriber); - return; + return NULL; } /* Allocate subscription object */ - sub = kzalloc(sizeof(*sub), GFP_ATOMIC); + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { warn("Subscription rejected, no memory\n"); subscr_terminate(subscriber); - return; + return NULL; } /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, subscriber->swap); - sub->seq.lower = htohl(s->seq.lower, subscriber->swap); - sub->seq.upper = htohl(s->seq.upper, subscriber->swap); - sub->timeout = htohl(s->timeout, subscriber->swap); - sub->filter = htohl(s->filter, subscriber->swap); + sub->seq.type = htohl(s->seq.type, swap); + sub->seq.lower = htohl(s->seq.lower, swap); + sub->seq.upper = htohl(s->seq.upper, swap); + sub->timeout = htohl(s->timeout, swap); + sub->filter = htohl(s->filter, swap); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { warn("Subscription rejected, illegal request\n"); kfree(sub); subscr_terminate(subscriber); - return; + return NULL; } - memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - INIT_LIST_HEAD(&sub->subscription_list); + sub->event_cb = subscr_send_event; INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); + sub->server_ref = subscriber->port_ref; + sub->swap = swap; + memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); atomic_inc(&topsrv.subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); k_start_timer(&sub->timer, sub->timeout); } - sub->owner = subscriber; - tipc_nametbl_subscribe(sub); + + return sub; } /** * subscr_conn_shutdown_event - handle termination request from subscriber + * + * Called with subscriber's server port unlocked. */ static void subscr_conn_shutdown_event(void *usr_handle, - u32 portref, + u32 port_ref, struct sk_buff **buf, unsigned char const *data, unsigned int size, int reason) { - struct subscriber *subscriber; + struct subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; - subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle); - if (subscriber == NULL) + if (tipc_port_lock(port_ref) == NULL) return; subscriber_lock = subscriber->lock; @@ -411,6 +414,8 @@ static void subscr_conn_shutdown_event(void *usr_handle, /** * subscr_conn_msg_event - handle new subscription request from subscriber + * + * Called with subscriber's server port unlocked. */ static void subscr_conn_msg_event(void *usr_handle, @@ -419,20 +424,46 @@ static void subscr_conn_msg_event(void *usr_handle, const unchar *data, u32 size) { - struct subscriber *subscriber; + struct subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; + struct subscription *sub; + + /* + * Lock subscriber's server port (& make a local copy of lock pointer, + * in case subscriber is deleted while processing subscription request) + */ - subscriber = tipc_ref_lock((u32)(unsigned long)usr_handle); - if (subscriber == NULL) + if (tipc_port_lock(port_ref) == NULL) return; subscriber_lock = subscriber->lock; - if (size != sizeof(struct tipc_subscr)) - subscr_terminate(subscriber); - else - subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); + if (size != sizeof(struct tipc_subscr)) { + subscr_terminate(subscriber); + spin_unlock_bh(subscriber_lock); + } else { + sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); + spin_unlock_bh(subscriber_lock); + if (sub != NULL) { + + /* + * We must release the server port lock before adding a + * subscription to the name table since TIPC needs to be + * able to (re)acquire the port lock if an event message + * issued by the subscription process is rejected and + * returned. The subscription cannot be deleted while + * it is being added to the name table because: + * a) the single-threading of the native API port code + * ensures the subscription cannot be cancelled and + * the subscriber connection cannot be broken, and + * b) the name table lock ensures the subscription + * timeout code cannot delete the subscription, + * so the subscription object is still protected. + */ + + tipc_nametbl_subscribe(sub); + } + } } /** @@ -448,16 +479,10 @@ static void subscr_named_msg_event(void *usr_handle, struct tipc_portid const *orig, struct tipc_name_seq const *dest) { - struct subscriber *subscriber; - struct iovec msg_sect = {NULL, 0}; - spinlock_t *subscriber_lock; + static struct iovec msg_sect = {NULL, 0}; - dbg("subscr_named_msg_event: orig = %x own = %x,\n", - orig->node, tipc_own_addr); - if (size && (size != sizeof(struct tipc_subscr))) { - warn("Subscriber rejected, invalid subscription size\n"); - return; - } + struct subscriber *subscriber; + u32 server_port_ref; /* Create subscriber object */ @@ -468,17 +493,11 @@ static void subscr_named_msg_event(void *usr_handle, } INIT_LIST_HEAD(&subscriber->subscription_list); INIT_LIST_HEAD(&subscriber->subscriber_list); - subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock); - if (subscriber->ref == 0) { - warn("Subscriber rejected, reference table exhausted\n"); - kfree(subscriber); - return; - } - /* Establish a connection to subscriber */ + /* Create server port & establish connection to subscriber */ tipc_createport(topsrv.user_ref, - (void *)(unsigned long)subscriber->ref, + subscriber, importance, NULL, NULL, @@ -490,32 +509,36 @@ static void subscr_named_msg_event(void *usr_handle, &subscriber->port_ref); if (subscriber->port_ref == 0) { warn("Subscriber rejected, unable to create port\n"); - tipc_ref_discard(subscriber->ref); kfree(subscriber); return; } tipc_connect2port(subscriber->port_ref, orig); + /* Lock server port (& save lock address for future use) */ + + subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; /* Add subscriber to topology server's subscriber list */ - tipc_ref_lock(subscriber->ref); spin_lock_bh(&topsrv.lock); list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); spin_unlock_bh(&topsrv.lock); - /* - * Subscribe now if message contains a subscription, - * otherwise send an empty response to complete connection handshaking - */ + /* Unlock server port */ - subscriber_lock = subscriber->lock; - if (size) - subscr_subscribe((struct tipc_subscr *)data, subscriber); - else - tipc_send(subscriber->port_ref, 1, &msg_sect); + server_port_ref = subscriber->port_ref; + spin_unlock_bh(subscriber->lock); - spin_unlock_bh(subscriber_lock); + /* Send an ACK- to complete connection handshaking */ + + tipc_send(server_port_ref, 1, &msg_sect); + + /* Handle optional subscription request */ + + if (size != 0) { + subscr_conn_msg_event(subscriber, server_port_ref, + buf, data, size); + } } int tipc_subscr_start(void) @@ -574,8 +597,8 @@ void tipc_subscr_stop(void) list_for_each_entry_safe(subscriber, subscriber_temp, &topsrv.subscriber_list, subscriber_list) { - tipc_ref_lock(subscriber->ref); subscriber_lock = subscriber->lock; + spin_lock_bh(subscriber_lock); subscr_terminate(subscriber); spin_unlock_bh(subscriber_lock); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 93a8e674fac1..45d89bf4d202 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -1,8 +1,8 @@ /* - * net/tipc/subscr.h: Include file for TIPC subscription service + * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,34 +37,44 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +struct subscription; + +typedef void (*tipc_subscr_event) (struct subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port_ref, u32 node); + /** * struct subscription - TIPC network topology subscription object * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription - * @evt: template for events generated by subscription - * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @event_cb: routine invoked when a subscription event is detected + * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @timer_ref: reference to timer governing subscription duration (may be NULL) - * @owner: pointer to subscriber object associated with this subscription + * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @server_ref: object reference of server port associated with subscription + * @swap: indicates if subscriber uses opposite endianness in its messages + * @evt: template for events generated by subscription */ struct subscription { struct tipc_name_seq seq; u32 timeout; u32 filter; - struct tipc_event evt; - struct list_head subscription_list; - struct list_head nameseq_list; + tipc_subscr_event event_cb; struct timer_list timer; - struct subscriber *owner; + struct list_head nameseq_list; + struct list_head subscription_list; + u32 server_ref; + int swap; + struct tipc_event evt; }; -int tipc_subscr_overlap(struct subscription * sub, +int tipc_subscr_overlap(struct subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct subscription * sub, +void tipc_subscr_report_overlap(struct subscription *sub, u32 found_lower, u32 found_upper, u32 event, diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c index 4146c40cd20b..506928803162 100644 --- a/net/tipc/user_reg.c +++ b/net/tipc/user_reg.c @@ -91,7 +91,7 @@ static int reg_init(void) } } spin_unlock_bh(®_lock); - return users ? TIPC_OK : -ENOMEM; + return users ? 0 : -ENOMEM; } /** @@ -129,7 +129,7 @@ int tipc_reg_start(void) tipc_k_signal((Handler)reg_callback, (unsigned long)&users[u]); } - return TIPC_OK; + return 0; } /** @@ -184,7 +184,7 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) if (cb && (tipc_mode != TIPC_NOT_RUNNING)) tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr); - return TIPC_OK; + return 0; } /** @@ -230,7 +230,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) struct tipc_user *user_ptr; if (up_ptr->user_ref == 0) - return TIPC_OK; + return 0; if (up_ptr->user_ref > MAX_USERID) return -EINVAL; if ((tipc_mode == TIPC_NOT_RUNNING) || !users ) @@ -240,7 +240,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) user_ptr = &users[up_ptr->user_ref]; list_add(&up_ptr->uport_list, &user_ptr->ports); spin_unlock_bh(®_lock); - return TIPC_OK; + return 0; } /** @@ -250,7 +250,7 @@ int tipc_reg_add_port(struct user_port *up_ptr) int tipc_reg_remove_port(struct user_port *up_ptr) { if (up_ptr->user_ref == 0) - return TIPC_OK; + return 0; if (up_ptr->user_ref > MAX_USERID) return -EINVAL; if (!users ) @@ -259,6 +259,6 @@ int tipc_reg_remove_port(struct user_port *up_ptr) spin_lock_bh(®_lock); list_del_init(&up_ptr->uport_list); spin_unlock_bh(®_lock); - return TIPC_OK; + return 0; } diff --git a/net/tipc/zone.c b/net/tipc/zone.c index 3506f8563441..2c01ba2d86bf 100644 --- a/net/tipc/zone.c +++ b/net/tipc/zone.c @@ -111,10 +111,10 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest) } } -struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref) +struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref) { struct cluster *c_ptr; - struct node *n_ptr; + struct tipc_node *n_ptr; u32 c_num; if (!z_ptr) diff --git a/net/tipc/zone.h b/net/tipc/zone.h index 6e7a08df8af5..7bdc3406ba9b 100644 --- a/net/tipc/zone.h +++ b/net/tipc/zone.h @@ -54,7 +54,7 @@ struct _zone { u32 links; }; -struct node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref); +struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref); u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref); void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router); void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 657835f227d3..c647aab8d418 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1,15 +1,13 @@ /* * NET4: Implementation of BSD Unix domain sockets. * - * Authors: Alan Cox, <alan.cox@linux.org> + * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ - * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. @@ -229,7 +227,7 @@ static void __unix_remove_socket(struct sock *sk) static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk_add_node(sk, list); } @@ -352,9 +350,9 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(sk_unhashed(sk)); - BUG_TRAP(!sk->sk_socket); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); return; @@ -487,8 +485,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); -static unsigned int unix_datagram_poll(struct file *, struct socket *, - poll_table *); +static unsigned int unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -534,7 +532,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -555,7 +553,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = unix_datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -605,7 +603,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) u->dentry = NULL; u->mnt = NULL; spin_lock_init(&u->lock); - atomic_set(&u->inflight, 0); + atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); @@ -1994,29 +1992,13 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } -static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk, *peer; - unsigned int mask; + struct sock *sk = sock->sk, *other; + unsigned int mask, writable; poll_wait(file, sk->sk_sleep, wait); - - peer = unix_peer_get(sk); - if (peer) { - if (peer != sk) { - /* - * Writability of a connected socket additionally - * depends on the state of the receive queue of the - * peer. - */ - poll_wait(file, &unix_sk(peer)->peer_wait, wait); - } else { - sock_put(peer); - peer = NULL; - } - } - mask = 0; /* exceptional events? */ @@ -2042,14 +2024,26 @@ static unsigned int unix_datagram_poll(struct file *file, struct socket *sock, } /* writable? */ - if (unix_writable(sk) && !(peer && unix_recvq_full(peer))) + writable = unix_writable(sk); + if (writable) { + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + poll_wait(file, &unix_sk(other)->peer_wait, + wait); + if (unix_recvq_full(other)) + writable = 0; + } + + sock_put(other); + } + } + + if (writable) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (peer) - sock_put(peer); - return mask; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ebdff3d877a1..2a27b84f740b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -127,7 +127,7 @@ void unix_inflight(struct file *fp) if(s) { struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); - if (atomic_inc_return(&u->inflight) == 1) { + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { @@ -145,7 +145,7 @@ void unix_notinflight(struct file *fp) struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); - if (atomic_dec_and_test(&u->inflight)) + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; spin_unlock(&unix_gc_lock); @@ -237,17 +237,17 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_dec(&usk->inflight); + atomic_long_dec(&usk->inflight); } static void inc_inflight(struct unix_sock *usk) { - atomic_inc(&usk->inflight); + atomic_long_inc(&usk->inflight); } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_inc(&u->inflight); + atomic_long_inc(&u->inflight); /* * If this is still a candidate, move it to the end of the * list, so that it's checked even if it was already passed @@ -288,11 +288,11 @@ void unix_gc(void) * before the detach without atomicity guarantees. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { - int total_refs; - int inflight_refs; + long total_refs; + long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_read(&u->inflight); + inflight_refs = atomic_long_read(&u->inflight); BUG_ON(inflight_refs < 1); BUG_ON(total_refs < inflight_refs); @@ -324,7 +324,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_read(&u->inflight) > 0) { + if (atomic_long_read(&u->inflight) > 0) { list_move_tail(&u->link, &gc_inflight_list); u->gc_candidate = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig index 1debe1cb054e..61ceae0b9566 100644 --- a/net/wanrouter/Kconfig +++ b/net/wanrouter/Kconfig @@ -20,8 +20,6 @@ config WAN_ROUTER wish to use your Linux box as a WAN router, say Y here and also to the WAN driver for your card, below. You will then need the wan-tools package which is available from <ftp://ftp.sangoma.com/>. - Read <file:Documentation/networking/wan-router.txt> for more - information. To compile WAN routing support as a module, choose M here: the module will be called wanrouter. diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 9ab31a3ce3ad..7f07152bc109 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -57,7 +57,6 @@ #include <linux/vmalloc.h> /* vmalloc, vfree */ #include <asm/uaccess.h> /* copy_to/from_user */ #include <linux/init.h> /* __initfunc et al. */ -#include <net/syncppp.h> #define KMEM_SAFETYZONE 8 @@ -350,9 +349,9 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) * o execute requested action or pass command to the device driver */ -int wanrouter_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file->f_path.dentry->d_inode; int err = 0; struct proc_dir_entry *dent; struct wan_device *wandev; @@ -372,6 +371,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, if (wandev->magic != ROUTER_MAGIC) return -EINVAL; + lock_kernel(); switch (cmd) { case ROUTER_SETUP: err = wanrouter_device_setup(wandev, data); @@ -403,6 +403,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, err = wandev->ioctl(wandev, cmd, arg); else err = -EINVAL; } + unlock_kernel(); return err; } @@ -565,9 +566,6 @@ static int wanrouter_device_new_if(struct wan_device *wandev, { wanif_conf_t *cnf; struct net_device *dev = NULL; -#ifdef CONFIG_WANPIPE_MULTPPP - struct ppp_device *pppdev=NULL; -#endif int err; if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) @@ -586,25 +584,10 @@ static int wanrouter_device_new_if(struct wan_device *wandev, goto out; if (cnf->config_id == WANCONFIG_MPPP) { -#ifdef CONFIG_WANPIPE_MULTPPP - pppdev = kzalloc(sizeof(struct ppp_device), GFP_KERNEL); - err = -ENOBUFS; - if (pppdev == NULL) - goto out; - pppdev->dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); - if (pppdev->dev == NULL) { - kfree(pppdev); - err = -ENOBUFS; - goto out; - } - err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf); - dev = pppdev->dev; -#else printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", wandev->name); err = -EPROTONOSUPPORT; goto out; -#endif } else { dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); err = -ENOBUFS; @@ -659,17 +642,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev, kfree(dev->priv); dev->priv = NULL; -#ifdef CONFIG_WANPIPE_MULTPPP - if (cnf->config_id == WANCONFIG_MPPP) - kfree(pppdev); - else - kfree(dev); -#else /* Sync PPP is disabled */ if (cnf->config_id != WANCONFIG_MPPP) kfree(dev); -#endif - out: kfree(cnf); return err; diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 5bebe40bf4e6..267f7ff49827 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -278,7 +278,7 @@ static const struct file_operations wandev_fops = { .read = seq_read, .llseek = seq_lseek, .release = single_release, - .ioctl = wanrouter_ioctl, + .unlocked_ioctl = wanrouter_ioctl, }; /* diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 79270903bda6..7d82be07fa1d 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -14,6 +14,38 @@ config NL80211 If unsure, say Y. +config WIRELESS_OLD_REGULATORY + bool "Old wireless static regulatory definitions" + default n + ---help--- + This option enables the old static regulatory information + and uses it within the new framework. This is available + temporarily as an option to help prevent immediate issues + due to the switch to the new regulatory framework which + does require a new userspace application which has the + database of regulatory information (CRDA) and another for + setting regulatory domains (iw). + + For more information see: + + http://wireless.kernel.org/en/developers/Regulatory/CRDA + http://wireless.kernel.org/en/users/Documentation/iw + + It is important to note though that if you *do* have CRDA present + and if this option is enabled CRDA *will* be called to update the + regulatory domain (for US and JP only). Support for letting the user + set the regulatory domain through iw is also supported. This option + mainly exists to leave around for a kernel release some old static + regulatory domains that were defined and to keep around the old + ieee80211_regdom module parameter. This is being phased out and you + should stop using them ASAP. + + Say N unless you cannot install a new userspace application + or have one currently depending on the ieee80211_regdom module + parameter and cannot port it to use the new userspace interfaces. + + This is scheduled for removal for 2.6.29. + config WIRELESS_EXT bool "Wireless extensions" default n @@ -29,3 +61,15 @@ config WIRELESS_EXT Say N (if you can) unless you know you need wireless extensions for external modules. + +config WIRELESS_EXT_SYSFS + bool "Wireless extensions sysfs files" + default y + depends on WIRELESS_EXT && SYSFS + help + This option enables the deprecated wireless statistics + files in /sys/class/net/*/wireless/. The same information + is available via the ioctls as well. + + Say Y if you have programs using it, like old versions of + hal. diff --git a/net/wireless/core.c b/net/wireless/core.c index 80afacdae46c..24fdd4cd22cb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1,7 +1,7 @@ /* * This is the linux wireless configuration interface. * - * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2006-2008 Johannes Berg <johannes@sipsolutions.net> */ #include <linux/if.h> @@ -19,6 +19,7 @@ #include "nl80211.h" #include "core.h" #include "sysfs.h" +#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -32,7 +33,6 @@ MODULE_DESCRIPTION("wireless configuration support"); * often because we need to do it for each command */ LIST_HEAD(cfg80211_drv_list); DEFINE_MUTEX(cfg80211_drv_mutex); -static int wiphy_counter; /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv) int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { + struct cfg80211_registered_device *drv; int idx, taken = -1, result, digits; + mutex_lock(&cfg80211_drv_mutex); + /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &idx, &taken); if (taken == strlen(newname) && idx != rdev->idx) { @@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, * deny the name if it is phy<idx> where <idx> is printed * without leading zeroes. taken == strlen(newname) here */ + result = -EINVAL; if (taken == strlen(PHY_NAME) + digits) - return -EINVAL; + goto out_unlock; } - /* this will check for collisions */ + + /* Ignore nop renames */ + result = 0; + if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0) + goto out_unlock; + + /* Ensure another device does not already have this name. */ + list_for_each_entry(drv, &cfg80211_drv_list, list) { + result = -EINVAL; + if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0) + goto out_unlock; + } + + /* this will only check for collisions in sysfs + * which is not even always compiled in. + */ result = device_rename(&rdev->wiphy.dev, newname); if (result) - return result; + goto out_unlock; if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, rdev->wiphy.debugfsdir, @@ -172,15 +191,21 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", newname); - nl80211_notify_dev_rename(rdev); + result = 0; +out_unlock: + mutex_unlock(&cfg80211_drv_mutex); + if (result == 0) + nl80211_notify_dev_rename(rdev); - return 0; + return result; } /* exported functions */ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) { + static int wiphy_counter; + struct cfg80211_registered_device *drv; int alloc_size; @@ -197,21 +222,18 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_lock(&cfg80211_drv_mutex); - drv->idx = wiphy_counter; - - /* now increase counter for the next device unless - * it has wrapped previously */ - if (wiphy_counter >= 0) - wiphy_counter++; - - mutex_unlock(&cfg80211_drv_mutex); + drv->idx = wiphy_counter++; if (unlikely(drv->idx < 0)) { + wiphy_counter--; + mutex_unlock(&cfg80211_drv_mutex); /* ugh, wrapped! */ kfree(drv); return NULL; } + mutex_unlock(&cfg80211_drv_mutex); + /* give it a proper name */ snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, PHY_NAME "%d", drv->idx); @@ -236,6 +258,13 @@ int wiphy_register(struct wiphy *wiphy) struct ieee80211_supported_band *sband; bool have_band = false; int i; + u16 ifmodes = wiphy->interface_modes; + + /* sanity check ifmodes */ + WARN_ON(!ifmodes); + ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + if (WARN_ON(ifmodes != wiphy->interface_modes)) + wiphy->interface_modes = ifmodes; /* sanity check supported bands/channels */ for (band = 0; band < IEEE80211_NUM_BANDS; band++) { @@ -272,7 +301,9 @@ int wiphy_register(struct wiphy *wiphy) ieee80211_set_bitrate_flags(wiphy); /* set up regulatory info */ - wiphy_update_regulatory(wiphy); + mutex_lock(&cfg80211_reg_mutex); + wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); + mutex_unlock(&cfg80211_reg_mutex); mutex_lock(&cfg80211_drv_mutex); @@ -350,6 +381,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + WARN_ON(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_UNSPECIFIED); + switch (state) { case NETDEV_REGISTER: mutex_lock(&rdev->devlist_mtx); @@ -381,7 +414,9 @@ static struct notifier_block cfg80211_netdev_notifier = { static int cfg80211_init(void) { - int err = wiphy_sysfs_init(); + int err; + + err = wiphy_sysfs_init(); if (err) goto out_fail_sysfs; @@ -395,8 +430,14 @@ static int cfg80211_init(void) ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL); + err = regulatory_init(); + if (err) + goto out_fail_reg; + return 0; +out_fail_reg: + debugfs_remove(ieee80211_debugfs_dir); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: @@ -404,6 +445,7 @@ out_fail_notifier: out_fail_sysfs: return err; } + subsys_initcall(cfg80211_init); static void cfg80211_exit(void) @@ -412,5 +454,6 @@ static void cfg80211_exit(void) nl80211_exit(); unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); + regulatory_exit(); } module_exit(cfg80211_exit); diff --git a/net/wireless/core.h b/net/wireless/core.h index 7a02c356d63d..771cc5cc7658 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -79,6 +79,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void wiphy_update_regulatory(struct wiphy *wiphy); +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fb75f265b39c..572793c8c7ab 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18,6 +18,7 @@ #include <net/cfg80211.h> #include "core.h" #include "nl80211.h" +#include "reg.h" /* the netlink family */ static struct genl_family nl80211_fam = { @@ -29,16 +30,16 @@ static struct genl_family nl80211_fam = { }; /* internal helper: get drv and dev */ -static int get_drv_dev_by_info_ifindex(struct genl_info *info, +static int get_drv_dev_by_info_ifindex(struct nlattr **attrs, struct cfg80211_registered_device **drv, struct net_device **dev) { int ifindex; - if (!info->attrs[NL80211_ATTR_IFINDEX]) + if (!attrs[NL80211_ATTR_IFINDEX]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); + ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); *dev = dev_get_by_index(&init_net, ifindex); if (!*dev) return -ENODEV; @@ -87,6 +88,16 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + + [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, + + [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + + [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, + .len = NL80211_HT_CAPABILITY_LEN }, }; /* message building helper */ @@ -106,10 +117,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_rates, *nl_rate; + struct nlattr *nl_modes; enum ieee80211_band band; struct ieee80211_channel *chan; struct ieee80211_rate *rate; int i; + u16 ifmodes = dev->wiphy.interface_modes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -118,6 +131,20 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); + if (!nl_modes) + goto nla_put_failure; + + i = 0; + while (ifmodes) { + if (ifmodes & 1) + NLA_PUT_FLAG(msg, i); + ifmodes >>= 1; + i++; + } + + nla_nest_end(msg, nl_modes); + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; @@ -199,12 +226,14 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) mutex_lock(&cfg80211_drv_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (++idx < start) + if (++idx <= start) continue; if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev) < 0) + dev) < 0) { + idx--; break; + } } mutex_unlock(&cfg80211_drv_mutex); @@ -270,7 +299,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); - /* TODO: interface type */ + NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype); return genlmsg_end(msg, hdr); nla_put_failure: @@ -289,21 +318,31 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * mutex_lock(&cfg80211_drv_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (++wp_idx < wp_start) + if (wp_idx < wp_start) { + wp_idx++; continue; + } if_idx = 0; mutex_lock(&dev->devlist_mtx); list_for_each_entry(wdev, &dev->netdev_list, list) { - if (++if_idx < if_start) + if (if_idx < if_start) { + if_idx++; continue; + } if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev) < 0) - break; + wdev->netdev) < 0) { + mutex_unlock(&dev->devlist_mtx); + goto out; + } + if_idx++; } mutex_unlock(&dev->devlist_mtx); + + wp_idx++; } + out: mutex_unlock(&cfg80211_drv_mutex); cb->args[0] = wp_idx; @@ -319,7 +358,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) struct net_device *netdev; int err; - err = get_drv_dev_by_info_ifindex(info, &dev, &netdev); + err = get_drv_dev_by_info_ifindex(info->attrs, &dev, &netdev); if (err) return err; @@ -379,40 +418,56 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err, ifindex; enum nl80211_iftype type; struct net_device *dev; - u32 flags; + u32 _flags, *flags = NULL; memset(¶ms, 0, sizeof(params)); - if (info->attrs[NL80211_ATTR_IFTYPE]) { - type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); - if (type > NL80211_IFTYPE_MAX) - return -EINVAL; - } else - return -EINVAL; - - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; + type = dev->ieee80211_ptr->iftype; dev_put(dev); - if (!drv->ops->change_virtual_intf) { + err = -EINVAL; + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + goto unlock; + } + + if (!drv->ops->change_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { + if (info->attrs[NL80211_ATTR_MESH_ID]) { + if (type != NL80211_IFTYPE_MESH_POINT) { + err = -EINVAL; + goto unlock; + } params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } + if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { + if (type != NL80211_IFTYPE_MONITOR) { + err = -EINVAL; + goto unlock; + } + err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], + &_flags); + if (!err) + flags = &_flags; + } rtnl_lock(); - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? - info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, - &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, err ? NULL : &flags, ¶ms); + type, flags, ¶ms); + + dev = __dev_get_by_index(&init_net, ifindex); + WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); + rtnl_unlock(); unlock: @@ -443,7 +498,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(drv)) return PTR_ERR(drv); - if (!drv->ops->add_virtual_intf) { + if (!drv->ops->add_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } @@ -475,7 +531,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) int ifindex, err; struct net_device *dev; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; @@ -543,7 +599,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -616,7 +672,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_KEY_DEFAULT]) return -EINVAL; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -697,7 +753,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -733,7 +789,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -762,7 +818,7 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) struct beacon_parameters params; int haveinfo = 0; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -841,7 +897,7 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -935,67 +991,78 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, } static int nl80211_dump_station(struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb) { - int wp_idx = 0; - int if_idx = 0; - int sta_idx = cb->args[2]; - int wp_start = cb->args[0]; - int if_start = cb->args[1]; struct station_info sinfo; struct cfg80211_registered_device *dev; - struct wireless_dev *wdev; + struct net_device *netdev; u8 mac_addr[ETH_ALEN]; + int ifidx = cb->args[0]; + int sta_idx = cb->args[1]; int err; - int exit = 0; - /* TODO: filter by device */ - mutex_lock(&cfg80211_drv_mutex); - list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (exit) + if (!ifidx) { + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (err) + return err; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!ifidx) + return -EINVAL; + } + + netdev = dev_get_by_index(&init_net, ifidx); + if (!netdev) + return -ENODEV; + + dev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_put_netdev; + } + + if (!dev->ops->dump_station) { + err = -ENOSYS; + goto out_err; + } + + rtnl_lock(); + + while (1) { + err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx, + mac_addr, &sinfo); + if (err == -ENOENT) break; - if (++wp_idx < wp_start) - continue; - if_idx = 0; + if (err) + goto out_err_rtnl; - mutex_lock(&dev->devlist_mtx); - list_for_each_entry(wdev, &dev->netdev_list, list) { - if (exit) - break; - if (++if_idx < if_start) - continue; - if (!dev->ops->dump_station) - continue; + if (nl80211_send_station(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, mac_addr, + &sinfo) < 0) + goto out; - for (;; ++sta_idx) { - rtnl_lock(); - err = dev->ops->dump_station(&dev->wiphy, - wdev->netdev, sta_idx, mac_addr, - &sinfo); - rtnl_unlock(); - if (err) { - sta_idx = 0; - break; - } - if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, mac_addr, - &sinfo) < 0) { - exit = 1; - break; - } - } - } - mutex_unlock(&dev->devlist_mtx); + sta_idx++; } - mutex_unlock(&cfg80211_drv_mutex); - cb->args[0] = wp_idx; - cb->args[1] = if_idx; - cb->args[2] = sta_idx; - return skb->len; + out: + cb->args[1] = sta_idx; + err = skb->len; + out_err_rtnl: + rtnl_unlock(); + out_err: + cfg80211_put_dev(dev); + out_put_netdev: + dev_put(netdev); + + return err; } static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) @@ -1014,7 +1081,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1102,6 +1169,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) return -EINVAL; @@ -1110,7 +1181,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1165,12 +1236,15 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) return -EINVAL; - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1205,7 +1279,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1275,68 +1349,78 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, } static int nl80211_dump_mpath(struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb) { - int wp_idx = 0; - int if_idx = 0; - int sta_idx = cb->args[2]; - int wp_start = cb->args[0]; - int if_start = cb->args[1]; struct mpath_info pinfo; struct cfg80211_registered_device *dev; - struct wireless_dev *wdev; + struct net_device *netdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; + int ifidx = cb->args[0]; + int path_idx = cb->args[1]; int err; - int exit = 0; - /* TODO: filter by device */ - mutex_lock(&cfg80211_drv_mutex); - list_for_each_entry(dev, &cfg80211_drv_list, list) { - if (exit) + if (!ifidx) { + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (err) + return err; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!ifidx) + return -EINVAL; + } + + netdev = dev_get_by_index(&init_net, ifidx); + if (!netdev) + return -ENODEV; + + dev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_put_netdev; + } + + if (!dev->ops->dump_mpath) { + err = -ENOSYS; + goto out_err; + } + + rtnl_lock(); + + while (1) { + err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx, + dst, next_hop, &pinfo); + if (err == -ENOENT) break; - if (++wp_idx < wp_start) - continue; - if_idx = 0; + if (err) + goto out_err_rtnl; - mutex_lock(&dev->devlist_mtx); - list_for_each_entry(wdev, &dev->netdev_list, list) { - if (exit) - break; - if (++if_idx < if_start) - continue; - if (!dev->ops->dump_mpath) - continue; + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, dst, next_hop, + &pinfo) < 0) + goto out; - for (;; ++sta_idx) { - rtnl_lock(); - err = dev->ops->dump_mpath(&dev->wiphy, - wdev->netdev, sta_idx, dst, - next_hop, &pinfo); - rtnl_unlock(); - if (err) { - sta_idx = 0; - break; - } - if (nl80211_send_mpath(skb, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, dst, next_hop, - &pinfo) < 0) { - exit = 1; - break; - } - } - } - mutex_unlock(&dev->devlist_mtx); + path_idx++; } - mutex_unlock(&cfg80211_drv_mutex); - cb->args[0] = wp_idx; - cb->args[1] = if_idx; - cb->args[2] = sta_idx; - return skb->len; + out: + cb->args[1] = path_idx; + err = skb->len; + out_err_rtnl: + rtnl_unlock(); + out_err: + cfg80211_put_dev(dev); + out_put_netdev: + dev_put(netdev); + + return err; } static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) @@ -1356,7 +1440,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1409,7 +1493,7 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1444,7 +1528,7 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1473,7 +1557,7 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); - err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; @@ -1492,6 +1576,183 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + struct bss_parameters params; + + memset(¶ms, 0, sizeof(params)); + /* default to not changing parameters */ + params.use_cts_prot = -1; + params.use_short_preamble = -1; + params.use_short_slot_time = -1; + + if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) + params.use_cts_prot = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]) + params.use_short_preamble = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) + params.use_short_slot_time = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + if (!drv->ops->change_bss) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->change_bss(&drv->wiphy, dev, ¶ms); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +static const struct nla_policy + reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { + [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, +}; + +static int parse_reg_rule(struct nlattr *tb[], + struct ieee80211_reg_rule *reg_rule) +{ + struct ieee80211_freq_range *freq_range = ®_rule->freq_range; + struct ieee80211_power_rule *power_rule = ®_rule->power_rule; + + if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_START]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_END]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) + return -EINVAL; + if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) + return -EINVAL; + + reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); + + freq_range->start_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); + freq_range->end_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); + freq_range->max_bandwidth_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); + + power_rule->max_eirp = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); + + if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) + power_rule->max_antenna_gain = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); + + return 0; +} + +static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + int r; + char *data = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + /* We ignore world regdom requests with the old regdom setup */ + if (is_world_regdom(data)) + return -EINVAL; +#endif + mutex_lock(&cfg80211_drv_mutex); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL); + mutex_unlock(&cfg80211_drv_mutex); + return r; +} + +static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; + struct nlattr *nl_reg_rule; + char *alpha2 = NULL; + int rem_reg_rules = 0, r = 0; + u32 num_rules = 0, rule_idx = 0, size_of_regd; + struct ieee80211_regdomain *rd = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_REG_RULES]) + return -EINVAL; + + alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + num_rules++; + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + if (!reg_is_valid_request(alpha2)) + return -EINVAL; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); + r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); + if (r) + goto bad_reg; + + rule_idx++; + + if (rule_idx > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + BUG_ON(rule_idx != num_rules); + + mutex_lock(&cfg80211_drv_mutex); + r = set_regdom(rd); + mutex_unlock(&cfg80211_drv_mutex); + if (r) + goto bad_reg; + + return r; + +bad_reg: + kfree(rd); + return -EINVAL; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -1623,6 +1884,24 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_BSS, + .doit = nl80211_set_bss, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_SET_REG, + .doit = nl80211_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_REQ_SET_REG, + .doit = nl80211_req_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 28fbd0b0b568..f591871a7b4f 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -59,23 +59,21 @@ int ieee80211_radiotap_iterator_init( return -EINVAL; /* sanity check for allowed length and radiotap length field */ - if (max_length < le16_to_cpu(get_unaligned(&radiotap_header->it_len))) + if (max_length < get_unaligned_le16(&radiotap_header->it_len)) return -EINVAL; iterator->rtheader = radiotap_header; - iterator->max_length = le16_to_cpu(get_unaligned( - &radiotap_header->it_len)); + iterator->max_length = get_unaligned_le16(&radiotap_header->it_len); iterator->arg_index = 0; - iterator->bitmap_shifter = le32_to_cpu(get_unaligned( - &radiotap_header->it_present)); + iterator->bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header); iterator->this_arg = NULL; /* find payload start allowing for extended bitmap(s) */ if (unlikely(iterator->bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT))) { - while (le32_to_cpu(get_unaligned((__le32 *)iterator->arg)) & - (1<<IEEE80211_RADIOTAP_EXT)) { + while (get_unaligned_le32(iterator->arg) & + (1 << IEEE80211_RADIOTAP_EXT)) { iterator->arg += sizeof(u32); /* @@ -241,8 +239,8 @@ int ieee80211_radiotap_iterator_next( if (iterator->bitmap_shifter & 1) { /* b31 was set, there is more */ /* move to next u32 bitmap */ - iterator->bitmap_shifter = le32_to_cpu( - get_unaligned(iterator->next_bitmap)); + iterator->bitmap_shifter = + get_unaligned_le32(iterator->next_bitmap); iterator->next_bitmap++; } else /* no more bitmaps: end */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 185488da2466..626dbb688499 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2,161 +2,871 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2008 Luis R. Rodriguez <lrodriguz@atheros.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* - * This regulatory domain control implementation is highly incomplete, it - * only exists for the purpose of not regressing mac80211. - * - * For now, drivers can restrict the set of allowed channels by either - * not registering those channels or setting the IEEE80211_CHAN_DISABLED - * flag; that flag will only be *set* by this code, never *cleared. +/** + * DOC: Wireless regulatory infrastructure * * The usual implementation is for a driver to read a device EEPROM to * determine which regulatory domain it should be operating under, then * looking up the allowable channels in a driver-local table and finally * registering those channels in the wiphy structure. * - * Alternatively, drivers that trust the regulatory domain control here - * will register a complete set of capabilities and the control code - * will restrict the set by setting the IEEE80211_CHAN_* flags. + * Another set of compliance enforcement is for drivers to use their + * own compliance limits which can be stored on the EEPROM. The host + * driver or firmware may ensure these are used. + * + * In addition to all this we provide an extra layer of regulatory + * conformance. For drivers which do not have any regulatory + * information CRDA provides the complete regulatory solution. + * For others it provides a community effort on further restrictions + * to enhance compliance. + * + * Note: When number of rules --> infinity we will not be able to + * index on alpha2 any more, instead we'll probably have to + * rely on some SHA1 checksum of the regdomain for example. + * */ #include <linux/kernel.h> +#include <linux/list.h> +#include <linux/random.h> +#include <linux/nl80211.h> +#include <linux/platform_device.h> #include <net/wireless.h> +#include <net/cfg80211.h> #include "core.h" +#include "reg.h" -static char *ieee80211_regdom = "US"; -module_param(ieee80211_regdom, charp, 0444); -MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); - -struct ieee80211_channel_range { - short start_freq; - short end_freq; - int max_power; - int max_antenna_gain; - u32 flags; +/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +struct regulatory_request { + struct list_head list; + struct wiphy *wiphy; + int granted; + enum reg_set_by initiator; + char alpha2[2]; }; -struct ieee80211_regdomain { - const char *code; - const struct ieee80211_channel_range *ranges; - int n_ranges; +static LIST_HEAD(regulatory_requests); +DEFINE_MUTEX(cfg80211_reg_mutex); + +/* To trigger userspace events */ +static struct platform_device *reg_pdev; + +/* Keep the ordering from large to small */ +static u32 supported_bandwidths[] = { + MHZ_TO_KHZ(40), + MHZ_TO_KHZ(20), }; -#define RANGE_PWR(_start, _end, _pwr, _ag, _flags) \ - { _start, _end, _pwr, _ag, _flags } +static struct list_head regulatory_requests; +/* Central wireless core regulatory domains, we only need two, + * the current one and a world regulatory domain in case we have no + * information to give us an alpha2 */ +static const struct ieee80211_regdomain *cfg80211_regdomain; -/* - * Ideally, in the future, these definitions will be loaded from a - * userspace table via some daemon. - */ -static const struct ieee80211_channel_range ieee80211_US_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), - /* IEEE 802.11a, channel 36*/ - RANGE_PWR(5180, 5180, 23, 6, 0), - /* IEEE 802.11a, channel 40*/ - RANGE_PWR(5200, 5200, 23, 6, 0), - /* IEEE 802.11a, channel 44*/ - RANGE_PWR(5220, 5220, 23, 6, 0), - /* IEEE 802.11a, channels 48..64 */ - RANGE_PWR(5240, 5320, 23, 6, 0), - /* IEEE 802.11a, channels 149..165, outdoor */ - RANGE_PWR(5745, 5825, 30, 6, 0), +/* We keep a static world regulatory domain in case of the absence of CRDA */ +static const struct ieee80211_regdomain world_regdom = { + .n_reg_rules = 1, + .alpha2 = "00", + .reg_rules = { + REG_RULE(2412-10, 2462+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), + } }; -static const struct ieee80211_channel_range ieee80211_JP_channels[] = { - /* IEEE 802.11b/g, channels 1..14 */ - RANGE_PWR(2412, 2484, 20, 6, 0), - /* IEEE 802.11a, channels 34..48 */ - RANGE_PWR(5170, 5240, 20, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channels 52..64 */ - RANGE_PWR(5260, 5320, 20, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), +static const struct ieee80211_regdomain *cfg80211_world_regdom = + &world_regdom; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +static char *ieee80211_regdom = "US"; +module_param(ieee80211_regdom, charp, 0444); +MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); + +/* We assume 40 MHz bandwidth for the old regulatory work. + * We make emphasis we are using the exact same frequencies + * as before */ + +static const struct ieee80211_regdomain us_regdom = { + .n_reg_rules = 6, + .alpha2 = "US", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 149..165, outdoor */ + REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), + } }; -#define REGDOM(_code) \ - { \ - .code = __stringify(_code), \ - .ranges = ieee80211_ ##_code## _channels, \ - .n_ranges = ARRAY_SIZE(ieee80211_ ##_code## _channels), \ +static const struct ieee80211_regdomain jp_regdom = { + .n_reg_rules = 3, + .alpha2 = "JP", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..14 */ + REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), + /* IEEE 802.11a, channels 34..48 */ + REG_RULE(5170-10, 5240+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 52..64 */ + REG_RULE(5260-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), } +}; -static const struct ieee80211_regdomain ieee80211_regdoms[] = { - REGDOM(US), - REGDOM(JP), +static const struct ieee80211_regdomain eu_regdom = { + .n_reg_rules = 6, + /* This alpha2 is bogus, we leave it here just for stupid + * backward compatibility */ + .alpha2 = "EU", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..13 */ + REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..140 */ + REG_RULE(5500-10, 5700+10, 40, 6, 30, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } }; +static const struct ieee80211_regdomain *static_regdom(char *alpha2) +{ + if (alpha2[0] == 'U' && alpha2[1] == 'S') + return &us_regdom; + if (alpha2[0] == 'J' && alpha2[1] == 'P') + return &jp_regdom; + if (alpha2[0] == 'E' && alpha2[1] == 'U') + return &eu_regdom; + /* Default, as per the old rules */ + return &us_regdom; +} + +static bool is_old_static_regdom(const struct ieee80211_regdomain *rd) +{ + if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) + return true; + return false; +} +#else +static inline bool is_old_static_regdom(const struct ieee80211_regdomain *rd) +{ + return false; +} +#endif -static const struct ieee80211_regdomain *get_regdom(void) +static void reset_regdomains(void) { - static const struct ieee80211_channel_range - ieee80211_world_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), + /* avoid freeing static information or freeing something twice */ + if (cfg80211_regdomain == cfg80211_world_regdom) + cfg80211_regdomain = NULL; + if (cfg80211_world_regdom == &world_regdom) + cfg80211_world_regdom = NULL; + if (cfg80211_regdomain == &world_regdom) + cfg80211_regdomain = NULL; + if (is_old_static_regdom(cfg80211_regdomain)) + cfg80211_regdomain = NULL; + + kfree(cfg80211_regdomain); + kfree(cfg80211_world_regdom); + + cfg80211_world_regdom = &world_regdom; + cfg80211_regdomain = NULL; +} + +/* Dynamic world regulatory domain requested by the wireless + * core upon initialization */ +static void update_world_regdomain(const struct ieee80211_regdomain *rd) +{ + BUG_ON(list_empty(®ulatory_requests)); + + reset_regdomains(); + + cfg80211_world_regdom = rd; + cfg80211_regdomain = rd; +} + +bool is_world_regdom(const char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] == '0' && alpha2[1] == '0') + return true; + return false; +} + +static bool is_alpha2_set(const char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] != 0 && alpha2[1] != 0) + return true; + return false; +} + +static bool is_alpha_upper(char letter) +{ + /* ASCII A - Z */ + if (letter >= 65 && letter <= 90) + return true; + return false; +} + +static bool is_unknown_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain was built by driver + * but a specific alpha2 cannot be determined */ + if (alpha2[0] == '9' && alpha2[1] == '9') + return true; + return false; +} + +static bool is_an_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) + return true; + return false; +} + +static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) +{ + if (!alpha2_x || !alpha2_y) + return false; + if (alpha2_x[0] == alpha2_y[0] && + alpha2_x[1] == alpha2_y[1]) + return true; + return false; +} + +static bool regdom_changed(const char *alpha2) +{ + if (!cfg80211_regdomain) + return true; + if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return false; + return true; +} + +/* This lets us keep regulatory code which is updated on a regulatory + * basis in userspace. */ +static int call_crda(const char *alpha2) +{ + char country_env[9 + 2] = "COUNTRY="; + char *envp[] = { + country_env, + NULL }; - static const struct ieee80211_regdomain regdom_world = REGDOM(world); - int i; - for (i = 0; i < ARRAY_SIZE(ieee80211_regdoms); i++) - if (strcmp(ieee80211_regdom, ieee80211_regdoms[i].code) == 0) - return &ieee80211_regdoms[i]; + if (!is_world_regdom((char *) alpha2)) + printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", + alpha2[0], alpha2[1]); + else + printk(KERN_INFO "cfg80211: Calling CRDA to update world " + "regulatory domain\n"); + + country_env[8] = alpha2[0]; + country_env[9] = alpha2[1]; - return ®dom_world; + return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); } +/* This has the logic which determines when a new request + * should be ignored. */ +static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, + char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *last_request = NULL; + + /* All initial requests are respected */ + if (list_empty(®ulatory_requests)) + return 0; + + last_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); -static void handle_channel(struct ieee80211_channel *chan, - const struct ieee80211_regdomain *rd) + switch (set_by) { + case REGDOM_SET_BY_INIT: + return -EINVAL; + case REGDOM_SET_BY_CORE: + /* Always respect new wireless core hints, should only + * come in for updating the world regulatory domain at init + * anyway */ + return 0; + case REGDOM_SET_BY_COUNTRY_IE: + if (last_request->initiator == set_by) { + if (last_request->wiphy != wiphy) { + /* Two cards with two APs claiming different + * different Country IE alpha2s! + * You're special!! */ + if (!alpha2_equal(last_request->alpha2, + cfg80211_regdomain->alpha2)) { + /* XXX: Deal with conflict, consider + * building a new one out of the + * intersection */ + WARN_ON(1); + return -EOPNOTSUPP; + } + return -EALREADY; + } + /* Two consecutive Country IE hints on the same wiphy */ + if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return 0; + return -EALREADY; + } + if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + "Invalid Country IE regulatory hint passed " + "to the wireless core\n") + return -EINVAL; + /* We ignore Country IE hints for now, as we haven't yet + * added the dot11MultiDomainCapabilityEnabled flag + * for wiphys */ + return 1; + case REGDOM_SET_BY_DRIVER: + BUG_ON(!wiphy); + if (last_request->initiator == set_by) { + /* Two separate drivers hinting different things, + * this is possible if you have two devices present + * on a system with different EEPROM regulatory + * readings. XXX: Do intersection, we support only + * the first regulatory hint for now */ + if (last_request->wiphy != wiphy) + return -EALREADY; + if (rd) + return -EALREADY; + /* Driver should not be trying to hint different + * regulatory domains! */ + BUG_ON(!alpha2_equal(alpha2, + cfg80211_regdomain->alpha2)); + return -EALREADY; + } + if (last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* XXX: Handle intersection, and add the + * dot11MultiDomainCapabilityEnabled flag to wiphy. For now + * we assume the driver has this set to false, following the + * 802.11d dot11MultiDomainCapabilityEnabled documentation */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return 0; + return 0; + case REGDOM_SET_BY_USER: + if (last_request->initiator == set_by || + last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* Drivers can use their wiphy's reg_notifier() + * to override any information */ + if (last_request->initiator == REGDOM_SET_BY_DRIVER) + return 0; + /* XXX: Handle intersection */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return -EOPNOTSUPP; + return 0; + default: + return -EINVAL; + } +} + +static bool __reg_is_valid_request(const char *alpha2, + struct regulatory_request **request) +{ + struct regulatory_request *req; + if (list_empty(®ulatory_requests)) + return false; + list_for_each_entry(req, ®ulatory_requests, list) { + if (alpha2_equal(req->alpha2, alpha2)) { + *request = req; + return true; + } + } + return false; +} + +/* Used by nl80211 before kmalloc'ing our regulatory domain */ +bool reg_is_valid_request(const char *alpha2) +{ + struct regulatory_request *request = NULL; + return __reg_is_valid_request(alpha2, &request); +} + +/* Sanity check on a regulatory rule */ +static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) +{ + const struct ieee80211_freq_range *freq_range = &rule->freq_range; + u32 freq_diff; + + if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) + return false; + + if (freq_range->start_freq_khz > freq_range->end_freq_khz) + return false; + + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; + + if (freq_range->max_bandwidth_khz > freq_diff) + return false; + + return true; +} + +static bool is_valid_rd(const struct ieee80211_regdomain *rd) +{ + const struct ieee80211_reg_rule *reg_rule = NULL; + unsigned int i; + + if (!rd->n_reg_rules) + return false; + + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + if (!is_valid_reg_rule(reg_rule)) + return false; + } + + return true; +} + +/* Returns value in KHz */ +static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, + u32 freq) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) { + u32 start_freq_khz = freq - supported_bandwidths[i]/2; + u32 end_freq_khz = freq + supported_bandwidths[i]/2; + if (start_freq_khz >= freq_range->start_freq_khz && + end_freq_khz <= freq_range->end_freq_khz) + return supported_bandwidths[i]; + } + return 0; +} + +/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may + * want to just have the channel structure use these */ +static u32 map_regdom_flags(u32 rd_flags) +{ + u32 channel_flags = 0; + if (rd_flags & NL80211_RRF_PASSIVE_SCAN) + channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN; + if (rd_flags & NL80211_RRF_NO_IBSS) + channel_flags |= IEEE80211_CHAN_NO_IBSS; + if (rd_flags & NL80211_RRF_DFS) + channel_flags |= IEEE80211_CHAN_RADAR; + return channel_flags; +} + +/** + * freq_reg_info - get regulatory information for the given frequency + * @center_freq: Frequency in KHz for which we want regulatory information for + * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one + * you can set this to 0. If this frequency is allowed we then set + * this value to the maximum allowed bandwidth. + * @reg_rule: the regulatory rule which we have for this frequency + * + * Use this function to get the regulatory rule for a specific frequency. + */ +static int freq_reg_info(u32 center_freq, u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule) { int i; - u32 flags = chan->orig_flags; - const struct ieee80211_channel_range *rg = NULL; + u32 max_bandwidth = 0; - for (i = 0; i < rd->n_ranges; i++) { - if (rd->ranges[i].start_freq <= chan->center_freq && - chan->center_freq <= rd->ranges[i].end_freq) { - rg = &rd->ranges[i]; + if (!cfg80211_regdomain) + return -EINVAL; + + for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + const struct ieee80211_reg_rule *rr; + const struct ieee80211_freq_range *fr = NULL; + const struct ieee80211_power_rule *pr = NULL; + + rr = &cfg80211_regdomain->reg_rules[i]; + fr = &rr->freq_range; + pr = &rr->power_rule; + max_bandwidth = freq_max_bandwidth(fr, center_freq); + if (max_bandwidth && *bandwidth <= max_bandwidth) { + *reg_rule = rr; + *bandwidth = max_bandwidth; break; } } - if (!rg) { - /* not found */ + return !max_bandwidth; +} + +static void handle_channel(struct ieee80211_channel *chan) +{ + int r; + u32 flags = chan->orig_flags; + u32 max_bandwidth = 0; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + + r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), + &max_bandwidth, ®_rule); + + if (r) { flags |= IEEE80211_CHAN_DISABLED; chan->flags = flags; return; } - chan->flags = flags; + power_rule = ®_rule->power_rule; + + chan->flags = flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, - rg->max_antenna_gain); + (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); if (chan->orig_mpwr) - chan->max_power = min(chan->orig_mpwr, rg->max_power); + chan->max_power = min(chan->orig_mpwr, + (int) MBM_TO_DBM(power_rule->max_eirp)); else - chan->max_power = rg->max_power; + chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct ieee80211_supported_band *sband, - const struct ieee80211_regdomain *rd) +static void handle_band(struct ieee80211_supported_band *sband) { int i; for (i = 0; i < sband->n_channels; i++) - handle_channel(&sband->channels[i], rd); + handle_channel(&sband->channels[i]); } -void wiphy_update_regulatory(struct wiphy *wiphy) +static void update_all_wiphy_regulatory(enum reg_set_by setby) { - enum ieee80211_band band; - const struct ieee80211_regdomain *rd = get_regdom(); + struct cfg80211_registered_device *drv; + + list_for_each_entry(drv, &cfg80211_drv_list, list) + wiphy_update_regulatory(&drv->wiphy, setby); +} - for (band = 0; band < IEEE80211_NUM_BANDS; band++) +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) +{ + enum ieee80211_band band; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy->bands[band], rd); + handle_band(wiphy->bands[band]); + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, setby); + } +} + +/* Caller must hold &cfg80211_drv_mutex */ +int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request; + char *rd_alpha2; + int r = 0; + + r = ignore_request(wiphy, set_by, (char *) alpha2, rd); + if (r) + return r; + + if (rd) + rd_alpha2 = rd->alpha2; + else + rd_alpha2 = (char *) alpha2; + + switch (set_by) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_COUNTRY_IE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->alpha2[0] = rd_alpha2[0]; + request->alpha2[1] = rd_alpha2[1]; + request->initiator = set_by; + request->wiphy = wiphy; + + list_add_tail(&request->list, ®ulatory_requests); + if (rd) + break; + r = call_crda(alpha2); +#ifndef CONFIG_WIRELESS_OLD_REGULATORY + if (r) + printk(KERN_ERR "cfg80211: Failed calling CRDA\n"); +#endif + break; + default: + r = -ENOTSUPP; + break; + } + + return r; +} + +/* If rd is not NULL and if this call fails the caller must free it */ +int regulatory_hint(struct wiphy *wiphy, const char *alpha2, + struct ieee80211_regdomain *rd) +{ + int r; + BUG_ON(!rd && !alpha2); + + mutex_lock(&cfg80211_drv_mutex); + + r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd); + if (r || !rd) + goto unlock_and_exit; + + /* If the driver passed a regulatory domain we skipped asking + * userspace for one so we can now go ahead and set it */ + r = set_regdom(rd); + +unlock_and_exit: + mutex_unlock(&cfg80211_drv_mutex); + return r; +} +EXPORT_SYMBOL(regulatory_hint); + + +static void print_rd_rules(const struct ieee80211_regdomain *rd) +{ + unsigned int i; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_freq_range *freq_range = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + + printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), " + "(max_antenna_gain, max_eirp)\n"); + + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + /* There may not be documentation for max antenna gain + * in certain regions */ + if (power_rule->max_antenna_gain) + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(%d mBi, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_antenna_gain, + power_rule->max_eirp); + else + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(N/A, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_eirp); + } +} + +static void print_regdomain(const struct ieee80211_regdomain *rd) +{ + + if (is_world_regdom(rd->alpha2)) + printk(KERN_INFO "cfg80211: World regulatory " + "domain updated:\n"); + else { + if (is_unknown_alpha2(rd->alpha2)) + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to driver built-in settings " + "(unknown country)\n"); + else + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to country: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + } + print_rd_rules(rd); +} + +void print_regdomain_info(const struct ieee80211_regdomain *rd) +{ + printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + print_rd_rules(rd); +} + +static int __set_regdom(const struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request = NULL; + + /* Some basic sanity checks first */ + + if (is_world_regdom(rd->alpha2)) { + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + update_world_regdomain(rd); + return 0; + } + + if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && + !is_unknown_alpha2(rd->alpha2)) + return -EINVAL; + + if (list_empty(®ulatory_requests)) + return -EINVAL; + + /* allow overriding the static definitions if CRDA is present */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; + + /* Now lets set the regulatory domain, update all driver channels + * and finally inform them of what we have done, in case they want + * to review or adjust their own settings based on their own + * internal EEPROM data */ + + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + + reset_regdomains(); + + /* Country IE parsing coming soon */ + switch (request->initiator) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + if (!is_valid_rd(rd)) { + printk(KERN_ERR "cfg80211: Invalid " + "regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; + } + break; + case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */ + WARN_ON(1); + default: + return -EOPNOTSUPP; + } + + /* Tada! */ + cfg80211_regdomain = rd; + request->granted = 1; + + return 0; +} + + +/* Use this call to set the current regulatory domain. Conflicts with + * multiple drivers can be ironed out later. Caller must've already + * kmalloc'd the rd structure. If this calls fails you should kfree() + * the passed rd. Caller must hold cfg80211_drv_mutex */ +int set_regdom(const struct ieee80211_regdomain *rd) +{ + struct regulatory_request *this_request = NULL, *prev_request = NULL; + int r; + + if (!list_empty(®ulatory_requests)) + prev_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); + + /* Note that this doesn't update the wiphys, this is done below */ + r = __set_regdom(rd); + if (r) + return r; + + BUG_ON((!__reg_is_valid_request(rd->alpha2, &this_request))); + + /* The initial standard core update of the world regulatory domain, no + * need to keep that request info around if it didn't fail. */ + if (is_world_regdom(rd->alpha2) && + this_request->initiator == REGDOM_SET_BY_CORE && + this_request->granted) { + list_del(&this_request->list); + kfree(this_request); + this_request = NULL; + } + + /* Remove old requests, we only leave behind the last one */ + if (prev_request) { + list_del(&prev_request->list); + kfree(prev_request); + prev_request = NULL; + } + + /* This would make this whole thing pointless */ + BUG_ON(rd != cfg80211_regdomain); + + /* update all wiphys now with the new established regulatory domain */ + update_all_wiphy_regulatory(this_request->initiator); + + print_regdomain(rd); + + return r; +} + +int regulatory_init(void) +{ + int err; + + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); + if (IS_ERR(reg_pdev)) + return PTR_ERR(reg_pdev); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + cfg80211_regdomain = static_regdom(ieee80211_regdom); + + printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); + print_regdomain_info(cfg80211_regdomain); + /* The old code still requests for a new regdomain and if + * you have CRDA you get it updated, otherwise you get + * stuck with the static values. We ignore "EU" code as + * that is not a valid ISO / IEC 3166 alpha2 */ + if (ieee80211_regdom[0] != 'E' && ieee80211_regdom[1] != 'U') + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, + ieee80211_regdom, NULL); +#else + cfg80211_regdomain = cfg80211_world_regdom; + + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + if (err) + printk(KERN_ERR "cfg80211: calling CRDA failed - " + "unable to update world regulatory domain, " + "using static definition\n"); +#endif + + return 0; +} + +void regulatory_exit(void) +{ + struct regulatory_request *req, *req_tmp; + + mutex_lock(&cfg80211_drv_mutex); + + reset_regdomains(); + + list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { + list_del(&req->list); + kfree(req); + } + platform_device_unregister(reg_pdev); + + mutex_unlock(&cfg80211_drv_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h new file mode 100644 index 000000000000..a33362872f3c --- /dev/null +++ b/net/wireless/reg.h @@ -0,0 +1,13 @@ +#ifndef __NET_WIRELESS_REG_H +#define __NET_WIRELESS_REG_H + +extern struct mutex cfg80211_reg_mutex; +bool is_world_regdom(const char *alpha2); +bool reg_is_valid_request(const char *alpha2); + +int regulatory_init(void); +void regulatory_exit(void); + +int set_regdom(const struct ieee80211_regdomain *rd); + +#endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/wext.c b/net/wireless/wext.c index 947188a5b937..d98ffb75119a 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -500,7 +500,7 @@ static int call_commit_handler(struct net_device *dev) /* * Calculate size of private arguments */ -static inline int get_priv_size(__u16 args) +static int get_priv_size(__u16 args) { int num = args & IW_PRIV_SIZE_MASK; int type = (args & IW_PRIV_TYPE_MASK) >> 12; @@ -512,10 +512,9 @@ static inline int get_priv_size(__u16 args) /* * Re-calculate the size of private arguments */ -static inline int adjust_priv_size(__u16 args, - union iwreq_data * wrqu) +static int adjust_priv_size(__u16 args, struct iw_point *iwp) { - int num = wrqu->data.length; + int num = iwp->length; int max = args & IW_PRIV_SIZE_MASK; int type = (args & IW_PRIV_TYPE_MASK) >> 12; @@ -695,19 +694,150 @@ void wext_proc_exit(struct net *net) */ /* ---------------------------------------------------------------- */ +static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_ioctl_description *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info) +{ + int err, extra_size, user_length = 0, essid_compat = 0; + char *extra; + + /* Calculate space needed by arguments. Always allocate + * for max space. + */ + extra_size = descr->max_tokens * descr->token_size; + + /* Check need for ESSID compatibility for WE < 21 */ + switch (cmd) { + case SIOCSIWESSID: + case SIOCGIWESSID: + case SIOCSIWNICKN: + case SIOCGIWNICKN: + if (iwp->length == descr->max_tokens + 1) + essid_compat = 1; + else if (IW_IS_SET(cmd) && (iwp->length != 0)) { + char essid[IW_ESSID_MAX_SIZE + 1]; + + err = copy_from_user(essid, iwp->pointer, + iwp->length * + descr->token_size); + if (err) + return -EFAULT; + + if (essid[iwp->length - 1] == '\0') + essid_compat = 1; + } + break; + default: + break; + } + + iwp->length -= essid_compat; + + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + /* Check NULL pointer */ + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; + /* Check if number of token fits within bounds */ + if (iwp->length > descr->max_tokens) + return -E2BIG; + if (iwp->length < descr->min_tokens) + return -EINVAL; + } else { + /* Check NULL pointer */ + if (!iwp->pointer) + return -EFAULT; + /* Save user space buffer size for checking */ + user_length = iwp->length; + + /* Don't check if user_length > max to allow forward + * compatibility. The test user_length < min is + * implied by the test at the end. + */ + + /* Support for very large requests */ + if ((descr->flags & IW_DESCR_FLAG_NOMAX) && + (user_length > descr->max_tokens)) { + /* Allow userspace to GET more than max so + * we can support any size GET requests. + * There is still a limit : -ENOMEM. + */ + extra_size = user_length * descr->token_size; + + /* Note : user_length is originally a __u16, + * and token_size is controlled by us, + * so extra_size won't get negative and + * won't overflow... + */ + } + } + + /* kzalloc() ensures NULL-termination for essid_compat. */ + extra = kzalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; + + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + } + + err = handler(dev, info, (union iwreq_data *) iwp, extra); + + iwp->length += essid_compat; + + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Check if there is enough buffer up there */ + if (user_length < iwp->length) { + err = -E2BIG; + goto out; + } + + if (copy_to_user(iwp->pointer, extra, + iwp->length * + descr->token_size)) { + err = -EFAULT; + goto out; + } + } + + /* Generate an event to notify listeners of the change */ + if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) { + union iwreq_data *data = (union iwreq_data *) iwp; + + if (descr->flags & IW_DESCR_FLAG_RESTRICT) + /* If the event is restricted, don't + * export the payload. + */ + wireless_send_event(dev, cmd, data, NULL); + else + wireless_send_event(dev, cmd, data, extra); + } + +out: + kfree(extra); + return err; +} + /* * Wrapper to call a standard Wireless Extension handler. * We do various checks and also take care of moving data between * user space and kernel space. */ static int ioctl_standard_call(struct net_device * dev, - struct ifreq * ifr, + struct iwreq *iwr, unsigned int cmd, + struct iw_request_info *info, iw_handler handler) { - struct iwreq * iwr = (struct iwreq *) ifr; const struct iw_ioctl_description * descr; - struct iw_request_info info; int ret = -EINVAL; /* Get the description of the IOCTL */ @@ -715,145 +845,19 @@ static int ioctl_standard_call(struct net_device * dev, return -EOPNOTSUPP; descr = &(standard_ioctl[cmd - SIOCIWFIRST]); - /* Prepare the call */ - info.cmd = cmd; - info.flags = 0; - /* Check if we have a pointer to user space data or not */ if (descr->header_type != IW_HEADER_TYPE_POINT) { /* No extra arguments. Trivial to handle */ - ret = handler(dev, &info, &(iwr->u), NULL); + ret = handler(dev, info, &(iwr->u), NULL); /* Generate an event to notify listeners of the change */ if ((descr->flags & IW_DESCR_FLAG_EVENT) && ((ret == 0) || (ret == -EIWCOMMIT))) wireless_send_event(dev, cmd, &(iwr->u), NULL); } else { - char * extra; - int extra_size; - int user_length = 0; - int err; - int essid_compat = 0; - - /* Calculate space needed by arguments. Always allocate - * for max space. Easier, and won't last long... */ - extra_size = descr->max_tokens * descr->token_size; - - /* Check need for ESSID compatibility for WE < 21 */ - switch (cmd) { - case SIOCSIWESSID: - case SIOCGIWESSID: - case SIOCSIWNICKN: - case SIOCGIWNICKN: - if (iwr->u.data.length == descr->max_tokens + 1) - essid_compat = 1; - else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - char essid[IW_ESSID_MAX_SIZE + 1]; - - err = copy_from_user(essid, iwr->u.data.pointer, - iwr->u.data.length * - descr->token_size); - if (err) - return -EFAULT; - - if (essid[iwr->u.data.length - 1] == '\0') - essid_compat = 1; - } - break; - default: - break; - } - - iwr->u.data.length -= essid_compat; - - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - /* Check NULL pointer */ - if ((iwr->u.data.pointer == NULL) && - (iwr->u.data.length != 0)) - return -EFAULT; - /* Check if number of token fits within bounds */ - if (iwr->u.data.length > descr->max_tokens) - return -E2BIG; - if (iwr->u.data.length < descr->min_tokens) - return -EINVAL; - } else { - /* Check NULL pointer */ - if (iwr->u.data.pointer == NULL) - return -EFAULT; - /* Save user space buffer size for checking */ - user_length = iwr->u.data.length; - - /* Don't check if user_length > max to allow forward - * compatibility. The test user_length < min is - * implied by the test at the end. */ - - /* Support for very large requests */ - if ((descr->flags & IW_DESCR_FLAG_NOMAX) && - (user_length > descr->max_tokens)) { - /* Allow userspace to GET more than max so - * we can support any size GET requests. - * There is still a limit : -ENOMEM. */ - extra_size = user_length * descr->token_size; - /* Note : user_length is originally a __u16, - * and token_size is controlled by us, - * so extra_size won't get negative and - * won't overflow... */ - } - } - - /* Create the kernel buffer */ - /* kzalloc ensures NULL-termination for essid_compat */ - extra = kzalloc(extra_size, GFP_KERNEL); - if (extra == NULL) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - err = copy_from_user(extra, iwr->u.data.pointer, - iwr->u.data.length * - descr->token_size); - if (err) { - kfree(extra); - return -EFAULT; - } - } - - /* Call the handler */ - ret = handler(dev, &info, &(iwr->u), extra); - - iwr->u.data.length += essid_compat; - - /* If we have something to return to the user */ - if (!ret && IW_IS_GET(cmd)) { - /* Check if there is enough buffer up there */ - if (user_length < iwr->u.data.length) { - kfree(extra); - return -E2BIG; - } - - err = copy_to_user(iwr->u.data.pointer, extra, - iwr->u.data.length * - descr->token_size); - if (err) - ret = -EFAULT; - } - - /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && - ((ret == 0) || (ret == -EIWCOMMIT))) { - if (descr->flags & IW_DESCR_FLAG_RESTRICT) - /* If the event is restricted, don't - * export the payload */ - wireless_send_event(dev, cmd, &(iwr->u), NULL); - else - wireless_send_event(dev, cmd, &(iwr->u), - extra); - } - - /* Cleanup - I told you it wasn't that long ;-) */ - kfree(extra); + ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info); } /* Call commit handler if needed and defined */ @@ -881,25 +885,22 @@ static int ioctl_standard_call(struct net_device * dev, * a iw_handler but process it in your ioctl handler (i.e. use the * old driver API). */ -static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, - unsigned int cmd, iw_handler handler) +static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, + const struct iw_priv_args **descrp) { - struct iwreq * iwr = (struct iwreq *) ifr; - const struct iw_priv_args * descr = NULL; - struct iw_request_info info; - int extra_size = 0; - int i; - int ret = -EINVAL; + const struct iw_priv_args *descr; + int i, extra_size; - /* Get the description of the IOCTL */ - for (i = 0; i < dev->wireless_handlers->num_private_args; i++) + descr = NULL; + for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { if (cmd == dev->wireless_handlers->private_args[i].cmd) { - descr = &(dev->wireless_handlers->private_args[i]); + descr = &dev->wireless_handlers->private_args[i]; break; } + } - /* Compute the size of the set/get arguments */ - if (descr != NULL) { + extra_size = 0; + if (descr) { if (IW_IS_SET(cmd)) { int offset = 0; /* For sub-ioctls */ /* Check for sub-ioctl handler */ @@ -924,72 +925,77 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, extra_size = 0; } } + *descrp = descr; + return extra_size; +} - /* Prepare the call */ - info.cmd = cmd; - info.flags = 0; +static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_priv_args *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info, int extra_size) +{ + char *extra; + int err; - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u)); - } else { - char * extra; - int err; + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - /* Check NULL pointer */ - if ((iwr->u.data.pointer == NULL) && - (iwr->u.data.length != 0)) - return -EFAULT; + if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) + return -E2BIG; + } else if (!iwp->pointer) + return -EFAULT; - /* Does it fits within bounds ? */ - if (iwr->u.data.length > (descr->set_args & - IW_PRIV_SIZE_MASK)) - return -E2BIG; - } else if (iwr->u.data.pointer == NULL) - return -EFAULT; + extra = kmalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; - /* Always allocate for max space. Easier, and won't last - * long... */ - extra = kmalloc(extra_size, GFP_KERNEL); - if (extra == NULL) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { - err = copy_from_user(extra, iwr->u.data.pointer, - extra_size); - if (err) { - kfree(extra); - return -EFAULT; - } + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, extra_size)) { + err = -EFAULT; + goto out; } + } - /* Call the handler */ - ret = handler(dev, &info, &(iwr->u), extra); + /* Call the handler */ + err = handler(dev, info, (union iwreq_data *) iwp, extra); - /* If we have something to return to the user */ - if (!ret && IW_IS_GET(cmd)) { + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Adjust for the actual length if it's variable, + * avoid leaking kernel bits outside. + */ + if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) + extra_size = adjust_priv_size(descr->get_args, iwp); - /* Adjust for the actual length if it's variable, - * avoid leaking kernel bits outside. */ - if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) { - extra_size = adjust_priv_size(descr->get_args, - &(iwr->u)); - } + if (copy_to_user(iwp->pointer, extra, extra_size)) + err = -EFAULT; + } - err = copy_to_user(iwr->u.data.pointer, extra, - extra_size); - if (err) - ret = -EFAULT; - } +out: + kfree(extra); + return err; +} - /* Cleanup - I told you it wasn't that long ;-) */ - kfree(extra); - } +static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + int extra_size = 0, ret = -EINVAL; + const struct iw_priv_args *descr; + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info, extra_size); + } /* Call commit handler if needed and defined */ if (ret == -EIWCOMMIT) @@ -999,12 +1005,21 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr, } /* ---------------------------------------------------------------- */ +typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, + unsigned int, struct iw_request_info *, + iw_handler); + /* * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, + unsigned int cmd, + struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) { + struct iwreq *iwr = (struct iwreq *) ifr; struct net_device *dev; iw_handler handler; @@ -1019,12 +1034,12 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i * Note that 'cmd' is already filtered in dev_ioctl() with * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ if (cmd == SIOCGIWSTATS) - return ioctl_standard_call(dev, ifr, cmd, - &iw_handler_get_iwstats); + return standard(dev, iwr, cmd, info, + &iw_handler_get_iwstats); if (cmd == SIOCGIWPRIV && dev->wireless_handlers) - return ioctl_standard_call(dev, ifr, cmd, - &iw_handler_get_private); + return standard(dev, iwr, cmd, info, + &iw_handler_get_private); /* Basic check */ if (!netif_device_present(dev)) @@ -1035,9 +1050,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i if (handler) { /* Standard and private are not the same */ if (cmd < SIOCIWFIRSTPRIV) - return ioctl_standard_call(dev, ifr, cmd, handler); + return standard(dev, iwr, cmd, info, handler); else - return ioctl_private_call(dev, ifr, cmd, handler); + return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ if (dev->do_ioctl) @@ -1045,27 +1060,154 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned i return -EOPNOTSUPP; } -/* entry point from dev ioctl */ -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, - void __user *arg) +/* If command is `set a parameter', or `get the encoding parameters', + * check if the user has the right to do it. + */ +static int wext_permission_check(unsigned int cmd) { - int ret; - - /* If command is `set a parameter', or - * `get the encoding parameters', check if - * the user has the right to do it */ if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT) && !capable(CAP_NET_ADMIN)) return -EPERM; + return 0; +} + +/* entry point from dev ioctl */ +static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, + unsigned int cmd, struct iw_request_info *info, + wext_ioctl_func standard, + wext_ioctl_func private) +{ + int ret = wext_permission_check(cmd); + + if (ret) + return ret; + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(net, ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private); rtnl_unlock(); - if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct iwreq))) + + return ret; +} + +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg) +{ + struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + int ret; + + ret = wext_ioctl_dispatch(net, ifr, cmd, &info, + ioctl_standard_call, + ioctl_private_call); + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(arg, ifr, sizeof(struct iwreq))) + return -EFAULT; + + return ret; +} + +#ifdef CONFIG_COMPAT +static int compat_standard_call(struct net_device *dev, + struct iwreq *iwr, + unsigned int cmd, + struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_ioctl_description *descr; + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + int err; + + descr = standard_ioctl + (cmd - SIOCIWFIRST); + + if (descr->header_type != IW_HEADER_TYPE_POINT) + return ioctl_standard_call(dev, iwr, cmd, info, handler); + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + + return err; +} + +static int compat_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_priv_args *descr; + int ret, extra_size; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + ret = ioctl_private_iw_point(&iwp, cmd, descr, + handler, dev, info, extra_size); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} + +int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct iw_request_info info; + struct iwreq iwr; + char *colon; + int ret; + + if (copy_from_user(&iwr, argp, sizeof(struct iwreq))) + return -EFAULT; + + iwr.ifr_name[IFNAMSIZ-1] = 0; + colon = strchr(iwr.ifr_name, ':'); + if (colon) + *colon = 0; + + info.cmd = cmd; + info.flags = IW_REQUEST_FLAG_COMPAT; + + ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info, + compat_standard_call, + compat_private_call); + + if (ret >= 0 && + IW_IS_GET(cmd) && + copy_to_user(argp, &iwr, sizeof(struct iwreq))) return -EFAULT; + return ret; } +#endif /************************* EVENT PROCESSING *************************/ /* @@ -1135,6 +1277,7 @@ static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev, r->ifi_flags = dev_get_flags(dev); r->ifi_change = 0; /* Wireless changes don't affect those flags */ + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); /* Add the wireless events in the netlink packet */ NLA_PUT(skb, IFLA_WIRELESS, event_len, event); @@ -1157,7 +1300,7 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) struct sk_buff *skb; int err; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 6ba67c523c16..9fc5b023d111 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -191,7 +191,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type == ARPHRD_X25 @@ -555,13 +555,11 @@ static struct sock *x25_make_new(struct sock *osk) x25 = x25_sk(sk); sk->sk_type = osk->sk_type; - sk->sk_socket = osk->sk_socket; sk->sk_priority = osk->sk_priority; sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; - sk->sk_sleep = osk->sk_sleep; sk->sk_backlog_rcv = osk->sk_backlog_rcv; sock_copy_flags(sk, osk); @@ -614,8 +612,7 @@ static int x25_release(struct socket *sock) break; } - sock->sk = NULL; - sk->sk_socket = NULL; /* Not used, but we should do this */ + sock_orphan(sk); out: return 0; } @@ -808,14 +805,12 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags) if (!skb->sk) goto out2; newsk = skb->sk; - newsk->sk_socket = newsock; - newsk->sk_sleep = &newsock->wait; + sock_graft(newsk, newsock); /* Now attach up the new socket */ skb->sk = NULL; kfree_skb(skb); sk->sk_ack_backlog--; - newsock->sk = newsk; newsock->state = SS_CONNECTED; rc = 0; out2: diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 3ff206c0ae94..3e1efe534645 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -95,7 +95,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, struct sk_buff *nskb; struct x25_neigh *nb; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) goto drop; nskb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 9201ef8ad90e..6d081674515f 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -46,6 +46,12 @@ config XFRM_STATISTICS If unsure, say N. +config XFRM_IPCOMP + tristate + select XFRM + select CRYPTO + select CRYPTO_DEFLATE + config NET_KEY tristate "PF_KEY sockets" select XFRM diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 332cfb0ff566..0f439a72ccab 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o +obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 23a2cc04b8cd..96036cf2216d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -718,7 +718,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { @@ -748,7 +748,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, for (; list; list = list->next) { int end; - BUG_TRAP(start <= offset + len); + WARN_ON(start > offset + len); end = start + list->len; if ((copy = end - offset) > 0) { diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c new file mode 100644 index 000000000000..c609a4b98e15 --- /dev/null +++ b/net/xfrm/xfrm_ipcomp.c @@ -0,0 +1,384 @@ +/* + * IP Payload Compression Protocol (IPComp) - RFC3173. + * + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2003-2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Todo: + * - Tunable compression parameters. + * - Compression stats. + * - Adaptive compression. + */ + +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/smp.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/ipcomp.h> +#include <net/xfrm.h> + +struct ipcomp_tfms { + struct list_head list; + struct crypto_comp **tfms; + int users; +}; + +static DEFINE_MUTEX(ipcomp_resource_mutex); +static void **ipcomp_scratches; +static int ipcomp_scratch_users; +static LIST_HEAD(ipcomp_tfms_list); + +static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + const u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); + int len; + + if (err) + goto out; + + if (dlen < (plen + sizeof(struct ip_comp_hdr))) { + err = -EINVAL; + goto out; + } + + len = dlen - plen; + if (len > skb_tailroom(skb)) + len = skb_tailroom(skb); + + skb->truesize += len; + __skb_put(skb, len); + + len += plen; + skb_copy_to_linear_data(skb, scratch, len); + + while ((scratch += len, dlen -= len) > 0) { + skb_frag_t *frag; + + err = -EMSGSIZE; + if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) + goto out; + + frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; + frag->page = alloc_page(GFP_ATOMIC); + + err = -ENOMEM; + if (!frag->page) + goto out; + + len = PAGE_SIZE; + if (dlen < len) + len = dlen; + + memcpy(page_address(frag->page), scratch, len); + + frag->page_offset = 0; + frag->size = len; + skb->truesize += len; + skb->data_len += len; + skb->len += len; + + skb_shinfo(skb)->nr_frags++; + } + + err = 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int nexthdr; + int err = -ENOMEM; + struct ip_comp_hdr *ipch; + + if (skb_linearize_cow(skb)) + goto out; + + skb->ip_summed = CHECKSUM_NONE; + + /* Remove ipcomp header and decompress original payload */ + ipch = (void *)skb->data; + nexthdr = ipch->nexthdr; + + skb->transport_header = skb->network_header + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); + err = ipcomp_decompress(x, skb); + if (err) + goto out; + + err = nexthdr; + +out: + return err; +} +EXPORT_SYMBOL_GPL(ipcomp_input); + +static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipcomp_data *ipcd = x->data; + const int plen = skb->len; + int dlen = IPCOMP_SCRATCH_SIZE; + u8 *start = skb->data; + const int cpu = get_cpu(); + u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); + struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + int err; + + local_bh_disable(); + err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); + local_bh_enable(); + if (err) + goto out; + + if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { + err = -EMSGSIZE; + goto out; + } + + memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); + put_cpu(); + + pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); + return 0; + +out: + put_cpu(); + return err; +} + +int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + struct ip_comp_hdr *ipch; + struct ipcomp_data *ipcd = x->data; + + if (skb->len < ipcd->threshold) { + /* Don't bother compressing */ + goto out_ok; + } + + if (skb_linearize_cow(skb)) + goto out_ok; + + err = ipcomp_compress(x, skb); + + if (err) { + goto out_ok; + } + + /* Install ipcomp header, convert into ipcomp datagram. */ + ipch = ip_comp_hdr(skb); + ipch->nexthdr = *skb_mac_header(skb); + ipch->flags = 0; + ipch->cpi = htons((u16 )ntohl(x->id.spi)); + *skb_mac_header(skb) = IPPROTO_COMP; +out_ok: + skb_push(skb, -skb_network_offset(skb)); + return 0; +} +EXPORT_SYMBOL_GPL(ipcomp_output); + +static void ipcomp_free_scratches(void) +{ + int i; + void **scratches; + + if (--ipcomp_scratch_users) + return; + + scratches = ipcomp_scratches; + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void **ipcomp_alloc_scratches(void) +{ + int i; + void **scratches; + + if (ipcomp_scratch_users++) + return ipcomp_scratches; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + ipcomp_scratches = scratches; + + for_each_possible_cpu(i) { + void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); + if (!scratch) + return NULL; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; +} + +static void ipcomp_free_tfms(struct crypto_comp **tfms) +{ + struct ipcomp_tfms *pos; + int cpu; + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + if (pos->tfms == tfms) + break; + } + + WARN_ON(!pos); + + if (--pos->users) + return; + + list_del(&pos->list); + kfree(pos); + + if (!tfms) + return; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); + } + free_percpu(tfms); +} + +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) +{ + struct ipcomp_tfms *pos; + struct crypto_comp **tfms; + int cpu; + + /* This can be any valid CPU ID so we don't need locking. */ + cpu = raw_smp_processor_id(); + + list_for_each_entry(pos, &ipcomp_tfms_list, list) { + struct crypto_comp *tfm; + + tfms = pos->tfms; + tfm = *per_cpu_ptr(tfms, cpu); + + if (!strcmp(crypto_comp_name(tfm), alg_name)) { + pos->users++; + return tfms; + } + } + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (!pos) + return NULL; + + pos->users = 1; + INIT_LIST_HEAD(&pos->list); + list_add(&pos->list, &ipcomp_tfms_list); + + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); + if (!tfms) + goto error; + + for_each_possible_cpu(cpu) { + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + *per_cpu_ptr(tfms, cpu) = tfm; + } + + return tfms; + +error: + ipcomp_free_tfms(tfms); + return NULL; +} + +static void ipcomp_free_data(struct ipcomp_data *ipcd) +{ + if (ipcd->tfms) + ipcomp_free_tfms(ipcd->tfms); + ipcomp_free_scratches(); +} + +void ipcomp_destroy(struct xfrm_state *x) +{ + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; + xfrm_state_delete_tunnel(x); + mutex_lock(&ipcomp_resource_mutex); + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); +} +EXPORT_SYMBOL_GPL(ipcomp_destroy); + +int ipcomp_init_state(struct xfrm_state *x) +{ + int err; + struct ipcomp_data *ipcd; + struct xfrm_algo_desc *calg_desc; + + err = -EINVAL; + if (!x->calg) + goto out; + + if (x->encap) + goto out; + + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + + mutex_lock(&ipcomp_resource_mutex); + if (!ipcomp_alloc_scratches()) + goto error; + + ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); + if (!ipcd->tfms) + goto error; + mutex_unlock(&ipcomp_resource_mutex); + + calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); + BUG_ON(!calg_desc); + ipcd->threshold = calg_desc->uinfo.comp.threshold; + x->data = ipcd; + err = 0; +out: + return err; + +error: + ipcomp_free_data(ipcd); + mutex_unlock(&ipcomp_resource_mutex); + kfree(ipcd); + goto out; +} +EXPORT_SYMBOL_GPL(ipcomp_init_state); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3f964db908a7..dc50f1e71f76 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0 || ntail > 0) - return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - - return 0; + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; + + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } static int xfrm_output_one(struct sk_buff *skb, int err) @@ -112,16 +116,13 @@ error_nolock: int xfrm_output_resume(struct sk_buff *skb, int err) { while (likely((err = xfrm_output_one(skb, err)) == 0)) { - struct xfrm_state *x; - nf_reset(skb); err = skb->dst->ops->local_out(skb); if (unlikely(err != 1)) goto out; - x = skb->dst->xfrm; - if (!x) + if (!skb->dst->xfrm) return dst_output(skb); err = nf_hook(skb->dst->ops->family, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cae9fd815543..832b47c1de80 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,7 +34,7 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; +int sysctl_xfrm_larval_drop __read_mostly = 1; #ifdef CONFIG_XFRM_STATISTICS DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; @@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; +static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (xp->walk.dead) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_LIST_HEAD(&policy->bytype); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); - write_lock_bh(&xfrm_policy_lock); - list_del(&policy->bytype); - write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy->security); kfree(policy); } @@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) int dead; write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; + dead = policy->walk.dead; + policy->walk.dead = 1; write_unlock_bh(&policy->lock); if (unlikely(dead)) { @@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); + list_del(&delpol->walk.all); xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); @@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); + list_add(&policy->walk.all, &xfrm_policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *old, *pol, *last = NULL; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; - if (walk->policy == NULL && walk->count != 0) + if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - old = pol = walk->policy; - walk->policy = NULL; - read_lock_bh(&xfrm_policy_lock); - - for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { - if (walk->type != walk->cur_type && - walk->type != XFRM_POLICY_TYPE_ANY) + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &xfrm_policy_all, all) { + if (x->dead) continue; - - if (pol == NULL) { - pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], - struct xfrm_policy, bytype); - } - list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { - if (pol->dead) - continue; - if (last) { - error = func(last, xfrm_policy_id2dir(last->index), - walk->count, data); - if (error) { - xfrm_pol_hold(last); - walk->policy = last; - goto out; - } - } - last = pol; - walk->count++; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } - pol = NULL; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - if (last) - error = func(last, xfrm_policy_id2dir(last->index), 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); - if (old != NULL) - xfrm_pol_put(old); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -1077,6 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); + list_add(&pol->walk.all, &xfrm_policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -1094,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; return pol; @@ -1719,7 +1729,7 @@ restart: for (pi = 0; pi < npols; pi++) { read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; + pol_dead |= pols[pi]->walk.dead; read_unlock_bh(&pols[pi]->lock); } @@ -1731,8 +1741,7 @@ restart: * We can't enlist stable bundles either. */ write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); + dst_free(dst); if (pol_dead) XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); @@ -1748,8 +1757,7 @@ restart: err = xfrm_dst_update_origin(dst, fl); if (unlikely(err)) { write_unlock_bh(&policy->lock); - if (dst) - dst_free(dst); + dst_free(dst); XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); goto error; } @@ -2360,7 +2368,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; switch (event) { @@ -2416,9 +2424,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) - INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); - + INIT_LIST_HEAD(&xfrm_policy_all); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } @@ -2602,7 +2608,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; @@ -2673,7 +2679,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2717,7 +2724,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 72fddafd891a..508337f97249 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -408,11 +408,10 @@ static void xfrm_state_gc_task(struct work_struct *data) struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); wake_up(&km_waitq); @@ -514,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->all); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -538,14 +537,10 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); - - spin_lock_bh(&xfrm_state_lock); - list_del(&x->all); - spin_unlock_bh(&xfrm_state_lock); + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -558,6 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -780,11 +776,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, { unsigned int h; struct hlist_node *entry; - struct xfrm_state *x, *x0; + struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; + to_put = NULL; + spin_lock_bh(&xfrm_state_lock); h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { @@ -833,7 +831,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (tmpl->id.spi && (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { - xfrm_state_put(x0); + to_put = x0; error = -EEXIST; goto out; } @@ -849,13 +847,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); if (error) { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + to_put = x; x = NULL; goto out; } if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -870,7 +869,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, xfrm_hash_grow_check(x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + to_put = x; x = NULL; error = -ESRCH; } @@ -881,6 +880,8 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); + if (to_put) + xfrm_state_put(to_put); return x; } @@ -922,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1051,6 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1067,18 +1069,20 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state *x1; + struct xfrm_state *x1, *to_put; int family; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; + to_put = NULL; + spin_lock_bh(&xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { - xfrm_state_put(x1); + to_put = x1; x1 = NULL; err = -EEXIST; goto out; @@ -1088,7 +1092,7 @@ int xfrm_state_add(struct xfrm_state *x) x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { - xfrm_state_put(x1); + to_put = x1; x1 = NULL; } } @@ -1110,6 +1114,9 @@ out: xfrm_state_put(x1); } + if (to_put) + xfrm_state_put(to_put); + return err; } EXPORT_SYMBOL(xfrm_state_add); @@ -1269,10 +1276,12 @@ EXPORT_SYMBOL(xfrm_state_migrate); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state *x1; + struct xfrm_state *x1, *to_put; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + to_put = NULL; + spin_lock_bh(&xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, x->props.family); @@ -1281,7 +1290,7 @@ int xfrm_state_update(struct xfrm_state *x) goto out; if (xfrm_state_kern(x1)) { - xfrm_state_put(x1); + to_put = x1; err = -EEXIST; goto out; } @@ -1295,6 +1304,9 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); + if (to_put) + xfrm_state_put(to_put); + if (err) return err; @@ -1537,47 +1549,62 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { - struct xfrm_state *old, *x, *last = NULL; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - if (walk->state == NULL && walk->count != 0) + if (walk->seq != 0 && list_empty(&walk->all)) return 0; - old = x = walk->state; - walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - if (x == NULL) - x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + if (list_empty(&walk->all)) + x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); list_for_each_entry_from(x, &xfrm_state_all, all) { - if (x->km.state == XFRM_STATE_DEAD) + if (x->state == XFRM_STATE_DEAD) continue; - if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; - if (last) { - err = func(last, walk->count, data); - if (err) { - xfrm_state_hold(last); - walk->state = last; - goto out; - } + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } - last = x; - walk->count++; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - if (last) - err = func(last, 0, data); + list_del_init(&walk->all); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) - xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + INIT_LIST_HEAD(&walk->all); + walk->proto = proto; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_state_walk_init); + +void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (list_empty(&walk->all)) + return; + + spin_lock_bh(&xfrm_state_lock); + list_del(&walk->all); + spin_lock_bh(&xfrm_state_lock); +} +EXPORT_SYMBOL(xfrm_state_walk_done); + void xfrm_replay_notify(struct xfrm_state *x, int event) { @@ -1787,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -1796,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b976d9ed10e4..4a8a1abb59ee 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -277,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (!x->sel.family) + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; - } /* @@ -1103,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1596,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOENT; read_lock(&xp->lock); - if (xp->dead) { + if (xp->walk.dead) { read_unlock(&xp->lock); goto out; } @@ -1711,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1746,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; @@ -1753,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); return 0; } @@ -1796,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) + int num_migrate, struct xfrm_kmaddress *k, + struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -1822,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; + if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) + goto nlmsg_failure; + if (copy_to_user_policy_type(type, skb) < 0) goto nlmsg_failure; @@ -1837,23 +1866,25 @@ nlmsg_failure: } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1902,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, }; static struct xfrm_link { |