diff options
author | Justin Waters <justin.waters@timesys.com> | 2008-02-26 13:07:02 -0500 |
---|---|---|
committer | Justin Waters <justin.waters@timesys.com> | 2008-02-26 13:07:02 -0500 |
commit | b80a32b9cc634adfa8eaef33ec981e7febf2ade2 (patch) | |
tree | f256bce13ba11f514a388160df84e1410bedbe2b /net | |
parent | 594133ef22fae0d737bd1b57352cf3f48a192c63 (diff) |
Update the i.MX31 Kernel to 2.6.232.6.23-mx31ads-2008022618072.6.23-mx31-200802261807
This is the result of a brute-force attempt to update the kernel to 2.6.23.
Now that we have a git tree, our effort will be a little nicer in the future.
Signed-off-by: Justin Waters <justin.waters@timesys.com>
Diffstat (limited to 'net')
441 files changed, 10312 insertions, 9848 deletions
diff --git a/net/802/tr.c b/net/802/tr.c index 0ba1946211c9..e56e61a7f545 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -567,7 +567,7 @@ static int rif_seq_show(struct seq_file *seq, void *v) } -static struct seq_operations rif_seq_ops = { +static const struct seq_operations rif_seq_ops = { .start = rif_seq_start, .next = rif_seq_next, .stop = rif_seq_stop, diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 97feb44dbdce..10ca7f486c3a 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o -8021q-objs := vlan.o vlan_dev.o +8021q-objs := vlan.o vlan_dev.o vlan_netlink.o ifeq ($(CONFIG_PROC_FS),y) 8021q-objs += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index de78c9dd713b..2a546919d6fb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -97,35 +97,22 @@ static int __init vlan_proto_init(void) /* Register us to receive netdevice events */ err = register_netdevice_notifier(&vlan_notifier_block); - if (err < 0) { - dev_remove_pack(&vlan_packet_type); - vlan_proc_cleanup(); - return err; - } + if (err < 0) + goto err1; - vlan_ioctl_set(vlan_ioctl_handler); + err = vlan_netlink_init(); + if (err < 0) + goto err2; + vlan_ioctl_set(vlan_ioctl_handler); return 0; -} - -/* Cleanup all vlan devices - * Note: devices that have been registered that but not - * brought up will exist but have no module ref count. - */ -static void __exit vlan_cleanup_devices(void) -{ - struct net_device *dev, *nxt; - - rtnl_lock(); - for_each_netdev_safe(dev, nxt) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, - VLAN_DEV_INFO(dev)->vlan_id); - unregister_netdevice(dev); - } - } - rtnl_unlock(); +err2: + unregister_netdevice_notifier(&vlan_notifier_block); +err1: + vlan_proc_cleanup(); + dev_remove_pack(&vlan_packet_type); + return err; } /* @@ -136,13 +123,13 @@ static void __exit vlan_cleanup_module(void) { int i; + vlan_netlink_fini(); vlan_ioctl_set(NULL); /* Un-register us from receiving netdevice events */ unregister_netdevice_notifier(&vlan_notifier_block); dev_remove_pack(&vlan_packet_type); - vlan_cleanup_devices(); /* This table must be empty if there are no module * references left. @@ -197,6 +184,34 @@ static void vlan_group_free(struct vlan_group *grp) kfree(grp); } +static struct vlan_group *vlan_group_alloc(int ifindex) +{ + struct vlan_group *grp; + unsigned int size; + unsigned int i; + + grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); + if (!grp) + return NULL; + + size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; + + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { + grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL); + if (!grp->vlan_devices_arrays[i]) + goto err; + } + + grp->real_dev_ifindex = ifindex; + hlist_add_head_rcu(&grp->hlist, + &vlan_group_hash[vlan_grp_hashfn(ifindex)]); + return grp; + +err: + vlan_group_free(grp); + return NULL; +} + static void vlan_rcu_free(struct rcu_head *rcu) { vlan_group_free(container_of(rcu, struct vlan_group, rcu)); @@ -278,50 +293,66 @@ static int unregister_vlan_dev(struct net_device *real_dev, return ret; } -static int unregister_vlan_device(const char *vlan_IF_name) +int unregister_vlan_device(struct net_device *dev) { - struct net_device *dev = NULL; int ret; + ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, + VLAN_DEV_INFO(dev)->vlan_id); + unregister_netdevice(dev); - dev = dev_get_by_name(vlan_IF_name); - ret = -EINVAL; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - rtnl_lock(); + if (ret == 1) + ret = 0; + return ret; +} - ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, - VLAN_DEV_INFO(dev)->vlan_id); +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; - dev_put(dev); - unregister_netdevice(dev); +static int vlan_dev_init(struct net_device *dev) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; - rtnl_unlock(); + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ + dev->flags = real_dev->flags & ~IFF_UP; + dev->iflink = real_dev->ifindex; + dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | + (1<<__LINK_STATE_DORMANT))) | + (1<<__LINK_STATE_PRESENT); - if (ret == 1) - ret = 0; - } else { - printk(VLAN_ERR - "%s: ERROR: Tried to remove a non-vlan device " - "with VLAN code, name: %s priv_flags: %hX\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); - ret = -EPERM; - } + if (is_zero_ether_addr(dev->dev_addr)) + memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); + + if (real_dev->features & NETIF_F_HW_VLAN_TX) { + dev->hard_header = real_dev->hard_header; + dev->hard_header_len = real_dev->hard_header_len; + dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; + dev->rebuild_header = real_dev->rebuild_header; } else { -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__); -#endif - ret = -EINVAL; + dev->hard_header = vlan_dev_hard_header; + dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; + dev->hard_start_xmit = vlan_dev_hard_start_xmit; + dev->rebuild_header = vlan_dev_rebuild_header; } + dev->hard_header_parse = real_dev->hard_header_parse; + dev->hard_header_cache = NULL; - return ret; + lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + return 0; } -static void vlan_setup(struct net_device *new_dev) +void vlan_setup(struct net_device *new_dev) { SET_MODULE_OWNER(new_dev); + ether_setup(new_dev); + /* new_dev->ifindex = 0; it will be set when added to * the global list. * iflink is set as well. @@ -338,12 +369,15 @@ static void vlan_setup(struct net_device *new_dev) /* set up method calls */ new_dev->change_mtu = vlan_dev_change_mtu; + new_dev->init = vlan_dev_init; new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; - new_dev->set_mac_address = vlan_dev_set_mac_address; new_dev->set_multicast_list = vlan_dev_set_multicast_list; + new_dev->change_rx_flags = vlan_change_rx_flags; new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; + + memset(new_dev->broadcast, 0, ETH_ALEN); } static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) @@ -366,77 +400,110 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } -/* - * vlan network devices have devices nesting below it, and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key vlan_netdev_xmit_lock_key; - - -/* Attach a VLAN device to a mac address (ie Ethernet Card). - * Returns the device that was created, or NULL if there was - * an error of some kind. - */ -static struct net_device *register_vlan_device(const char *eth_IF_name, - unsigned short VLAN_ID) +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) { - struct vlan_group *grp; - struct net_device *new_dev; - struct net_device *real_dev; /* the ethernet device */ - char name[IFNAMSIZ]; - int i; - -#ifdef VLAN_DEBUG - printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", - __FUNCTION__, eth_IF_name, VLAN_ID); -#endif - - if (VLAN_ID >= VLAN_VID_MASK) - goto out_ret_null; - - /* find the device relating to eth_IF_name. */ - real_dev = dev_get_by_name(eth_IF_name); - if (!real_dev) - goto out_ret_null; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { printk(VLAN_DBG "%s: VLANs not supported on %s.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !real_dev->vlan_rx_register) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); - goto out_put_dev; + return -EOPNOTSUPP; } - /* From this point on, all the data structures must remain - * consistent. - */ - rtnl_lock(); - /* The real device must be up and operating in order to * assosciate a VLAN device with it. */ if (!(real_dev->flags & IFF_UP)) - goto out_unlock; + return -ENETDOWN; - if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) { + if (__find_vlan_dev(real_dev, vlan_id) != NULL) { /* was already registered. */ printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__); - goto out_unlock; + return -EEXIST; + } + + return 0; +} + +int register_vlan_dev(struct net_device *dev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev = vlan->real_dev; + unsigned short vlan_id = vlan->vlan_id; + struct vlan_group *grp, *ngrp = NULL; + int err; + + grp = __vlan_find_group(real_dev->ifindex); + if (!grp) { + ngrp = grp = vlan_group_alloc(real_dev->ifindex); + if (!grp) + return -ENOBUFS; } + err = register_netdevice(dev); + if (err < 0) + goto out_free_group; + + /* Account for reference in struct vlan_dev_info */ + dev_hold(real_dev); + + vlan_transfer_operstate(real_dev, dev); + linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ + + /* So, got the sucker initialized, now lets place + * it into our local structure. + */ + vlan_group_set_device(grp, vlan_id, dev); + if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) + real_dev->vlan_rx_register(real_dev, ngrp); + if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + real_dev->vlan_rx_add_vid(real_dev, vlan_id); + + if (vlan_proc_add_dev(dev) < 0) + printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", + dev->name); + return 0; + +out_free_group: + if (ngrp) + vlan_group_free(ngrp); + return err; +} + +/* Attach a VLAN device to a mac address (ie Ethernet Card). + * Returns 0 if the device was created or a negative error code otherwise. + */ +static int register_vlan_device(struct net_device *real_dev, + unsigned short VLAN_ID) +{ + struct net_device *new_dev; + char name[IFNAMSIZ]; + int err; + +#ifdef VLAN_DEBUG + printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", + __FUNCTION__, eth_IF_name, VLAN_ID); +#endif + + if (VLAN_ID >= VLAN_VID_MASK) + return -ERANGE; + + err = vlan_check_real_dev(real_dev, VLAN_ID); + if (err < 0) + return err; + /* Gotta set up the fields for the device. */ #ifdef VLAN_DEBUG printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n", @@ -471,138 +538,62 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, vlan_setup); if (new_dev == NULL) - goto out_unlock; - -#ifdef VLAN_DEBUG - printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); -#endif - /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - new_dev->flags = real_dev->flags; - new_dev->flags &= ~IFF_UP; - - new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | - (1<<__LINK_STATE_DORMANT))) | - (1<<__LINK_STATE_PRESENT); + return -ENOBUFS; /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; - /* TODO: maybe just assign it to be ETHERNET? */ - new_dev->type = real_dev->type; - - new_dev->hard_header_len = real_dev->hard_header_len; - if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) { - /* Regular ethernet + 4 bytes (18 total). */ - new_dev->hard_header_len += VLAN_HLEN; - } - +#ifdef VLAN_DEBUG + printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", new_dev->priv, sizeof(struct vlan_dev_info)); - - memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len); - memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); - new_dev->addr_len = real_dev->addr_len; - - if (real_dev->features & NETIF_F_HW_VLAN_TX) { - new_dev->hard_header = real_dev->hard_header; - new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; - new_dev->rebuild_header = real_dev->rebuild_header; - } else { - new_dev->hard_header = vlan_dev_hard_header; - new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; - new_dev->rebuild_header = vlan_dev_rebuild_header; - } - new_dev->hard_header_parse = real_dev->hard_header_parse; +#endif VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; VLAN_DEV_INFO(new_dev)->dent = NULL; - VLAN_DEV_INFO(new_dev)->flags = 1; + VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR; -#ifdef VLAN_DEBUG - printk(VLAN_DBG "About to go find the group for idx: %i\n", - real_dev->ifindex); -#endif - - if (register_netdevice(new_dev)) + new_dev->rtnl_link_ops = &vlan_link_ops; + err = register_vlan_dev(new_dev); + if (err < 0) goto out_free_newdev; - lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); - - new_dev->iflink = real_dev->ifindex; - vlan_transfer_operstate(real_dev, new_dev); - linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ - - /* So, got the sucker initialized, now lets place - * it into our local structure. - */ - grp = __vlan_find_group(real_dev->ifindex); - - /* Note, we are running under the RTNL semaphore - * so it cannot "appear" on us. - */ - if (!grp) { /* need to add a new group */ - grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); - if (!grp) - goto out_free_unregister; - - for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { - grp->vlan_devices_arrays[i] = kzalloc( - sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN, - GFP_KERNEL); - - if (!grp->vlan_devices_arrays[i]) - goto out_free_arrays; - } - - /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ - grp->real_dev_ifindex = real_dev->ifindex; - - hlist_add_head_rcu(&grp->hlist, - &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]); - - if (real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, grp); - } - - vlan_group_set_device(grp, VLAN_ID, new_dev); - - if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */ - printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", - new_dev->name); - - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); - - rtnl_unlock(); - - #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif - return new_dev; - -out_free_arrays: - vlan_group_free(grp); - -out_free_unregister: - unregister_netdev(new_dev); - goto out_unlock; + return 0; out_free_newdev: free_netdev(new_dev); + return err; +} -out_unlock: - rtnl_unlock(); +static void vlan_sync_address(struct net_device *dev, + struct net_device *vlandev) +{ + struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev); -out_put_dev: - dev_put(real_dev); + /* May be called without an actual change */ + if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr)) + return; -out_ret_null: - return NULL; + /* vlan address was different from the old address and is equal to + * the new address */ + if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && + !compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) + dev_unicast_delete(dev, vlandev->dev_addr, ETH_ALEN); + + /* vlan address was equal to the old address and is different from + * the new address */ + if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && + compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) + dev_unicast_add(dev, vlandev->dev_addr, ETH_ALEN); + + memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); } static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -631,6 +622,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; + case NETDEV_CHANGEADDR: + /* Adjust unicast filters on underlying device */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(grp, i); + if (!vlandev) + continue; + + vlan_sync_address(dev, vlandev); + } + break; + case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { @@ -693,9 +695,10 @@ out: */ static int vlan_ioctl_handler(void __user *arg) { - int err = 0; + int err; unsigned short vid = 0; struct vlan_ioctl_args args; + struct net_device *dev = NULL; if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) return -EFAULT; @@ -708,35 +711,61 @@ static int vlan_ioctl_handler(void __user *arg) printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd); #endif + rtnl_lock(); + + switch (args.cmd) { + case SET_VLAN_INGRESS_PRIORITY_CMD: + case SET_VLAN_EGRESS_PRIORITY_CMD: + case SET_VLAN_FLAG_CMD: + case ADD_VLAN_CMD: + case DEL_VLAN_CMD: + case GET_VLAN_REALDEV_NAME_CMD: + case GET_VLAN_VID_CMD: + err = -ENODEV; + dev = __dev_get_by_name(args.device1); + if (!dev) + goto out; + + err = -EINVAL; + if (args.cmd != ADD_VLAN_CMD && + !(dev->priv_flags & IFF_802_1Q_VLAN)) + goto out; + } + switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_ingress_priority(args.device1, - args.u.skb_priority, - args.vlan_qos); + break; + vlan_dev_set_ingress_priority(dev, + args.u.skb_priority, + args.vlan_qos); break; case SET_VLAN_EGRESS_PRIORITY_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_egress_priority(args.device1, + break; + err = vlan_dev_set_egress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_FLAG_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = vlan_dev_set_vlan_flag(args.device1, + break; + err = vlan_dev_set_vlan_flag(dev, args.u.flag, args.vlan_qos); break; case SET_VLAN_NAME_TYPE_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { + if ((args.u.name_type >= 0) && + (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { vlan_name_type = args.u.name_type; err = 0; } else { @@ -745,26 +774,17 @@ static int vlan_ioctl_handler(void __user *arg) break; case ADD_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* we have been given the name of the Ethernet Device we want to - * talk to: args.dev1 We also have the - * VLAN ID: args.u.VID - */ - if (register_vlan_device(args.device1, args.u.VID)) { - err = 0; - } else { - err = -EINVAL; - } + break; + err = register_vlan_device(dev, args.u.VID); break; case DEL_VLAN_CMD: + err = -EPERM; if (!capable(CAP_NET_ADMIN)) - return -EPERM; - /* Here, the args.dev1 is the actual VLAN we want - * to get rid of. - */ - err = unregister_vlan_device(args.device1); + break; + err = unregister_vlan_device(dev); break; case GET_VLAN_INGRESS_PRIORITY_CMD: @@ -788,9 +808,8 @@ static int vlan_ioctl_handler(void __user *arg) err = -EINVAL; break; case GET_VLAN_REALDEV_NAME_CMD: - err = vlan_dev_get_realdev_name(args.device1, args.u.device2); - if (err) - goto out; + err = 0; + vlan_dev_get_realdev_name(dev, args.u.device2); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { err = -EFAULT; @@ -798,9 +817,8 @@ static int vlan_ioctl_handler(void __user *arg) break; case GET_VLAN_VID_CMD: - err = vlan_dev_get_vid(args.device1, &vid); - if (err) - goto out; + err = 0; + vlan_dev_get_vid(dev, &vid); args.u.VID = vid; if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) { @@ -812,9 +830,11 @@ static int vlan_ioctl_handler(void __user *arg) /* pass on to underlying device instead?? */ printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n", __FUNCTION__, args.cmd); - return -EINVAL; + err = -EINVAL; + break; } out: + rtnl_unlock(); return err; } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 1976cdba8f72..7df5b2935579 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -58,15 +58,28 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); -int vlan_dev_set_mac_address(struct net_device *dev, void* addr); int vlan_dev_open(struct net_device* dev); int vlan_dev_stop(struct net_device* dev); int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); -int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); -int vlan_dev_get_realdev_name(const char* dev_name, char* result); -int vlan_dev_get_vid(const char* dev_name, unsigned short* result); +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio); +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val); +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); +void vlan_change_rx_flags(struct net_device *dev, int change); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); +int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); +void vlan_setup(struct net_device *dev); +int register_vlan_dev(struct net_device *dev); +int unregister_vlan_device(struct net_device *dev); + +int vlan_netlink_init(void); +void vlan_netlink_fini(void); + +extern struct rtnl_link_ops vlan_link_ops; + #endif /* !(__BEN_VLAN_802_1Q_INC__) */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ec46084f44b4..328759c32d61 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,7 +73,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb) static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) { - if (VLAN_DEV_INFO(skb->dev)->flags & 1) { + if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { if (skb_shared(skb) || skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); kfree_skb(skb); @@ -116,12 +116,22 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) { unsigned char *rawp = NULL; - struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); + struct vlan_hdr *vhdr; unsigned short vid; struct net_device_stats *stats; unsigned short vlan_TCI; __be16 proto; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return -1; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) { + kfree_skb(skb); + return -1; + } + + vhdr = (struct vlan_hdr *)(skb->data); + /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); @@ -350,7 +360,8 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, * header shuffling in the hard_start_xmit. Users can turn off this * REORDER behaviour with the vconfig tool. */ - build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0); + if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) + build_vlan_header = 1; if (build_vlan_header) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); @@ -534,272 +545,123 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) return 0; } -int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +void vlan_dev_set_ingress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* see if a priority mapping exists.. */ - VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; - dev_put(dev); - return 0; - } + if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) + vlan->nr_ingress_mappings--; + else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio) + vlan->nr_ingress_mappings++; - dev_put(dev); - } - return -EINVAL; + vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio; } -int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) +int vlan_dev_set_egress_priority(const struct net_device *dev, + u32 skb_prio, short vlan_prio) { - struct net_device *dev = dev_get_by_name(dev_name); + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; + u32 vlan_qos = (vlan_prio << 13) & 0xE000; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* See if a priority mapping exists.. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; - while (mp) { - if (mp->priority == skb_prio) { - mp->vlan_qos = ((vlan_prio << 13) & 0xE000); - dev_put(dev); - return 0; - } - mp = mp->next; - } - - /* Create a new mapping then. */ - mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; - np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); - if (np) { - np->next = mp; - np->priority = skb_prio; - np->vlan_qos = ((vlan_prio << 13) & 0xE000); - VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; - dev_put(dev); - return 0; - } else { - dev_put(dev); - return -ENOBUFS; - } - } - dev_put(dev); - } - return -EINVAL; -} - -/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) -{ - struct net_device *dev = dev_get_by_name(dev_name); - - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - /* verify flag is supported */ - if (flag == 1) { - if (flag_val) { - VLAN_DEV_INFO(dev)->flags |= 1; - } else { - VLAN_DEV_INFO(dev)->flags &= ~1; - } - dev_put(dev); - return 0; - } else { - printk(KERN_ERR "%s: flag %i is not valid.\n", - __FUNCTION__, (int)(flag)); - dev_put(dev); - return -EINVAL; - } - } else { - printk(KERN_ERR - "%s: %s is not a vlan device, priv_flags: %hX.\n", - __FUNCTION__, dev->name, dev->priv_flags); - dev_put(dev); + /* See if a priority mapping exists.. */ + mp = vlan->egress_priority_map[skb_prio & 0xF]; + while (mp) { + if (mp->priority == skb_prio) { + if (mp->vlan_qos && !vlan_qos) + vlan->nr_egress_mappings--; + else if (!mp->vlan_qos && vlan_qos) + vlan->nr_egress_mappings++; + mp->vlan_qos = vlan_qos; + return 0; } - } else { - printk(KERN_ERR "%s: Could not find device: %s\n", - __FUNCTION__, dev_name); + mp = mp->next; } - return -EINVAL; + /* Create a new mapping then. */ + mp = vlan->egress_priority_map[skb_prio & 0xF]; + np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); + if (!np) + return -ENOBUFS; + + np->next = mp; + np->priority = skb_prio; + np->vlan_qos = vlan_qos; + vlan->egress_priority_map[skb_prio & 0xF] = np; + if (vlan_qos) + vlan->nr_egress_mappings++; + return 0; } - -int vlan_dev_get_realdev_name(const char *dev_name, char* result) +/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ +int vlan_dev_set_vlan_flag(const struct net_device *dev, + u32 flag, short flag_val) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); - rv = 0; + /* verify flag is supported */ + if (flag == VLAN_FLAG_REORDER_HDR) { + if (flag_val) { + VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; } else { - rv = -EINVAL; + VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; } - dev_put(dev); - } else { - rv = -ENODEV; + return 0; } - return rv; + printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag); + return -EINVAL; } -int vlan_dev_get_vid(const char *dev_name, unsigned short* result) +void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) { - struct net_device *dev = dev_get_by_name(dev_name); - int rv = 0; - if (dev) { - if (dev->priv_flags & IFF_802_1Q_VLAN) { - *result = VLAN_DEV_INFO(dev)->vlan_id; - rv = 0; - } else { - rv = -EINVAL; - } - dev_put(dev); - } else { - rv = -ENODEV; - } - return rv; + strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); } - -int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) +void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) { - struct sockaddr *addr = (struct sockaddr *)(addr_struct_p); - int i; - - if (netif_running(dev)) - return -EBUSY; - - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - - printk("%s: Setting MAC address to ", dev->name); - for (i = 0; i < 6; i++) - printk(" %2.2x", dev->dev_addr[i]); - printk(".\n"); - - if (memcmp(VLAN_DEV_INFO(dev)->real_dev->dev_addr, - dev->dev_addr, - dev->addr_len) != 0) { - if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_PROMISC)) { - int flgs = VLAN_DEV_INFO(dev)->real_dev->flags; - - /* Increment our in-use promiscuity counter */ - dev_set_promiscuity(VLAN_DEV_INFO(dev)->real_dev, 1); - - /* Make PROMISC visible to the user. */ - flgs |= IFF_PROMISC; - printk("VLAN (%s): Setting underlying device (%s) to promiscious mode.\n", - dev->name, VLAN_DEV_INFO(dev)->real_dev->name); - dev_change_flags(VLAN_DEV_INFO(dev)->real_dev, flgs); - } - } else { - printk("VLAN (%s): Underlying device (%s) has same MAC, not checking promiscious mode.\n", - dev->name, VLAN_DEV_INFO(dev)->real_dev->name); - } - - return 0; + *result = VLAN_DEV_INFO(dev)->vlan_id; } -static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, - struct dev_mc_list *dmi2) -{ - return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) && - (memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0)); -} - -/** dmi is a single entry into a dev_mc_list, a single node. mc_list is - * an entire list, and we'll iterate through it. - */ -static int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) +int vlan_dev_open(struct net_device *dev) { - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi != NULL; ) { - if (vlan_dmi_equals(dmi, idmi)) { - if (dmi->dmi_users > idmi->dmi_users) - return 1; - else - return 0; - } else { - idmi = idmi->next; - } - } - - return 1; -} + struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct net_device *real_dev = vlan->real_dev; + int err; -static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list) -{ - struct dev_mc_list *dmi = mc_list; - struct dev_mc_list *next; + if (!(real_dev->flags & IFF_UP)) + return -ENETDOWN; - while(dmi) { - next = dmi->next; - kfree(dmi); - dmi = next; + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { + err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN); + if (err < 0) + return err; } -} + memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); -static void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info) -{ - struct dev_mc_list *dmi, *new_dmi; - - vlan_destroy_mc_list(vlan_info->old_mc_list); - vlan_info->old_mc_list = NULL; - - for (dmi = mc_list; dmi != NULL; dmi = dmi->next) { - new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC); - if (new_dmi == NULL) { - printk(KERN_ERR "vlan: cannot allocate memory. " - "Multicast may not work properly from now.\n"); - return; - } + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, 1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, 1); - /* Copy whole structure, then make new 'next' pointer */ - *new_dmi = *dmi; - new_dmi->next = vlan_info->old_mc_list; - vlan_info->old_mc_list = new_dmi; - } + return 0; } -static void vlan_flush_mc_list(struct net_device *dev) +int vlan_dev_stop(struct net_device *dev) { - struct dev_mc_list *dmi = dev->mc_list; - - while (dmi) { - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n", - dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - dmi = dev->mc_list; - } + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; - /* dev->mc_list is NULL by the time we get here. */ - vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list); - VLAN_DEV_INFO(dev)->old_mc_list = NULL; -} + dev_mc_unsync(real_dev, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, -1); -int vlan_dev_open(struct net_device *dev) -{ - if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP)) - return -ENETDOWN; + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) + dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); return 0; } -int vlan_dev_stop(struct net_device *dev) -{ - vlan_flush_mc_list(dev); - return 0; -} - int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; @@ -816,9 +678,6 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (real_dev->do_ioctl && netif_device_present(real_dev)) err = real_dev->do_ioctl(real_dev, &ifrr, cmd); break; - - case SIOCETHTOOL: - err = dev_ethtool(&ifrr); } if (!err) @@ -827,68 +686,18 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -/** Taken from Gleb + Lennert's VLAN code, and modified... */ -void vlan_dev_set_multicast_list(struct net_device *vlan_dev) +void vlan_change_rx_flags(struct net_device *dev, int change) { - struct dev_mc_list *dmi; - struct net_device *real_dev; - int inc; - - if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) { - /* Then it's a real vlan device, as far as we can tell.. */ - real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev; - - /* compare the current promiscuity to the last promisc we had.. */ - inc = vlan_dev->promiscuity - VLAN_DEV_INFO(vlan_dev)->old_promiscuity; - if (inc) { - printk(KERN_INFO "%s: dev_set_promiscuity(master, %d)\n", - vlan_dev->name, inc); - dev_set_promiscuity(real_dev, inc); /* found in dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_promiscuity = vlan_dev->promiscuity; - } - - inc = vlan_dev->allmulti - VLAN_DEV_INFO(vlan_dev)->old_allmulti; - if (inc) { - printk(KERN_INFO "%s: dev_set_allmulti(master, %d)\n", - vlan_dev->name, inc); - dev_set_allmulti(real_dev, inc); /* dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_allmulti = vlan_dev->allmulti; - } - - /* looking for addresses to add to master's list */ - for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) { - dev_mc_add(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: add %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address to master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; - /* looking for addresses to delete from master's list */ - for (dmi = VLAN_DEV_INFO(vlan_dev)->old_mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, vlan_dev->mc_list)) { - /* if we think we should add it to the new list, then we should really - * delete it from the real list on the underlying device. - */ - dev_mc_delete(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); +} - /* save multicast list */ - vlan_copy_mc_list(vlan_dev->mc_list, VLAN_DEV_INFO(vlan_dev)); - } +/** Taken from Gleb + Lennert's VLAN code, and modified... */ +void vlan_dev_set_multicast_list(struct net_device *vlan_dev) +{ + dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev); } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d216a64421cd..bd08aa090763 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -69,7 +69,7 @@ static const char name_conf[] = "config"; * Generic /proc/net/vlan/<file> file and inode operations */ -static struct seq_operations vlan_seq_ops = { +static const struct seq_operations vlan_seq_ops = { .start = vlan_seq_start, .next = vlan_seq_next, .stop = vlan_seq_stop, @@ -319,7 +319,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) static const char fmt[] = "%30s %12lu\n"; int i; - if ((vlandev == NULL) || (!(vlandev->priv_flags & IFF_802_1Q_VLAN))) + if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) return 0; seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", @@ -342,7 +342,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "Device: %s", dev_info->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, - "\nINGRESS priority mappings: 0:%lu 1:%lu 2:%lu 3:%lu 4:%lu 5:%lu 6:%lu 7:%lu\n", + "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", dev_info->ingress_priority_map[0], dev_info->ingress_priority_map[1], dev_info->ingress_priority_map[2], @@ -357,7 +357,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; while (mp) { - seq_printf(seq, "%lu:%hu ", + seq_printf(seq, "%u:%hu ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); mp = mp->next; } diff --git a/net/Kconfig b/net/Kconfig index f3de72978ab6..cdba08ca2efe 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -227,6 +227,7 @@ source "net/ieee80211/Kconfig" endmenu source "net/rfkill/Kconfig" +source "net/9p/Kconfig" endif # if NET endmenu # Networking diff --git a/net/Makefile b/net/Makefile index 34e5b2d7f877..bbe7d2a41486 100644 --- a/net/Makefile +++ b/net/Makefile @@ -37,7 +37,6 @@ obj-$(CONFIG_AX25) += ax25/ obj-$(CONFIG_IRDA) += irda/ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ -obj-$(CONFIG_RXRPC) += rxrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ @@ -52,6 +51,7 @@ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ +obj-$(CONFIG_NET_9P) += 9p/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 5ef6a238bdbc..3d1655f98388 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -1024,7 +1024,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations aarp_seq_ops = { +static const struct seq_operations aarp_seq_ops = { .start = aarp_seq_start, .next = aarp_seq_next, .stop = aarp_seq_stop, diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 57ff8122b5c5..87a582cc8111 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -204,21 +204,21 @@ out: return 0; } -static struct seq_operations atalk_seq_interface_ops = { +static const struct seq_operations atalk_seq_interface_ops = { .start = atalk_seq_interface_start, .next = atalk_seq_interface_next, .stop = atalk_seq_interface_stop, .show = atalk_seq_interface_show, }; -static struct seq_operations atalk_seq_route_ops = { +static const struct seq_operations atalk_seq_route_ops = { .start = atalk_seq_route_start, .next = atalk_seq_route_next, .stop = atalk_seq_route_stop, .show = atalk_seq_route_show, }; -static struct seq_operations atalk_seq_socket_ops = { +static const struct seq_operations atalk_seq_socket_ops = { .start = atalk_seq_socket_start, .next = atalk_seq_socket_next, .stop = atalk_seq_socket_stop, diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 0e9f00c5c899..c0f6861eefe3 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -460,11 +460,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_pull(skb, plen); skb_set_mac_header(skb, -ETH_HLEN); skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_BR2684_FAST_TRANS - skb->protocol = ((u16 *) skb->data)[-1]; -#else /* some protocols might require this: */ skb->protocol = br_type_trans(skb, net_dev); -#endif /* CONFIG_BR2684_FAST_TRANS */ #else skb_pull(skb, plen - ETH_HLEN); skb->protocol = eth_type_trans(skb, net_dev); @@ -699,28 +695,13 @@ static struct atm_ioctl br2684_ioctl_ops = { #ifdef CONFIG_PROC_FS static void *br2684_seq_start(struct seq_file *seq, loff_t *pos) { - loff_t offs = 0; - struct br2684_dev *brd; - read_lock(&devs_lock); - - list_for_each_entry(brd, &br2684_devs, br2684_devs) { - if (offs == *pos) - return brd; - ++offs; - } - return NULL; + return seq_list_start(&br2684_devs, *pos); } static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct br2684_dev *brd = v; - - ++*pos; - - brd = list_entry(brd->br2684_devs.next, - struct br2684_dev, br2684_devs); - return (&brd->br2684_devs != &br2684_devs) ? brd : NULL; + return seq_list_next(v, &br2684_devs, pos); } static void br2684_seq_stop(struct seq_file *seq, void *v) @@ -730,7 +711,8 @@ static void br2684_seq_stop(struct seq_file *seq, void *v) static int br2684_seq_show(struct seq_file *seq, void *v) { - const struct br2684_dev *brdev = v; + const struct br2684_dev *brdev = list_entry(v, struct br2684_dev, + br2684_devs); const struct net_device *net_dev = brdev->net_dev; const struct br2684_vcc *brvcc; @@ -772,7 +754,7 @@ static int br2684_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations br2684_seq_ops = { +static const struct seq_operations br2684_seq_ops = { .start = br2684_seq_start, .next = br2684_seq_next, .stop = br2684_seq_stop, diff --git a/net/atm/clip.c b/net/atm/clip.c index 876b77f14745..ecf0f79b94ae 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -928,7 +928,7 @@ static int clip_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations arp_seq_ops = { +static const struct seq_operations arp_seq_ops = { .start = clip_seq_start, .next = neigh_seq_next, .stop = neigh_seq_stop, diff --git a/net/atm/lec.c b/net/atm/lec.c index 4dc5f2b8c43c..59d5aa3366f2 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -21,7 +21,6 @@ #include <net/dst.h> #include <linux/proc_fs.h> #include <linux/spinlock.h> -#include <linux/proc_fs.h> #include <linux/seq_file.h> /* TokenRing if needed */ @@ -1174,7 +1173,7 @@ static int lec_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations lec_seq_ops = { +static const struct seq_operations lec_seq_ops = { .start = lec_seq_start, .next = lec_seq_next, .stop = lec_seq_stop, diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 4b05cbec7a58..91f3ffc90dbd 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -177,7 +177,7 @@ static int mpc_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations mpc_op = { +static const struct seq_operations mpc_op = { .start = mpc_start, .next = mpc_next, .stop = mpc_stop, diff --git a/net/atm/proc.c b/net/atm/proc.c index 9e61e512f667..99fc1fe950ee 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -110,7 +110,7 @@ static inline void *vcc_walk(struct vcc_state *state, loff_t l) } static int __vcc_seq_open(struct inode *inode, struct file *file, - int family, struct seq_operations *ops) + int family, const struct seq_operations *ops) { struct vcc_state *state; struct seq_file *seq; @@ -260,7 +260,7 @@ static int atm_dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations atm_dev_seq_ops = { +static const struct seq_operations atm_dev_seq_ops = { .start = atm_dev_seq_start, .next = atm_dev_seq_next, .stop = atm_dev_seq_stop, @@ -295,7 +295,7 @@ static int pvc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations pvc_seq_ops = { +static const struct seq_operations pvc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, @@ -329,7 +329,7 @@ static int vcc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations vcc_seq_ops = { +static const struct seq_operations vcc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, @@ -364,7 +364,7 @@ static int svc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations svc_seq_ops = { +static const struct seq_operations svc_seq_ops = { .start = vcc_seq_start, .next = vcc_seq_next, .stop = vcc_seq_stop, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 429e13a6c6ad..dae2a42d3d86 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1262,7 +1262,7 @@ static int __must_check ax25_connect(struct socket *sock, for (;;) { prepare_to_wait(sk->sk_sleep, &wait, - TASK_INTERRUPTIBLE); + TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; if (!signal_pending(current)) { @@ -1924,7 +1924,7 @@ static int ax25_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_info_seqops = { +static const struct seq_operations ax25_info_seqops = { .start = ax25_info_start, .next = ax25_info_next, .stop = ax25_info_stop, diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 16be0c14780a..8443af57a374 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -69,7 +69,6 @@ void ax25_protocol_release(unsigned int pid) if (protocol->pid == pid) { protocol_list = protocol->next; write_unlock_bh(&protocol_list_lock); - kfree(protocol); return; } @@ -78,7 +77,6 @@ void ax25_protocol_release(unsigned int pid) s = protocol->next; protocol->next = protocol->next->next; write_unlock_bh(&protocol_list_lock); - kfree(s); return; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d65b8e22868d..9ecf6f1df863 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -320,7 +320,7 @@ static int ax25_rt_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_rt_seqops = { +static const struct seq_operations ax25_rt_seqops = { .start = ax25_rt_seq_start, .next = ax25_rt_seq_next, .stop = ax25_rt_seq_stop, diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 75c76647b2cb..ce0b13d44385 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -185,7 +185,7 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ax25_uid_seqops = { +static const struct seq_operations ax25_uid_seqops = { .start = ax25_uid_seq_start, .next = ax25_uid_seq_next, .stop = ax25_uid_seq_stop, diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1c8f4a0c5f43..1f78c3e336d8 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -36,6 +36,7 @@ #include <linux/signal.h> #include <linux/init.h> #include <linux/wait.h> +#include <linux/freezer.h> #include <linux/errno.h> #include <linux/net.h> #include <net/sock.h> @@ -474,7 +475,6 @@ static int bnep_session(void *arg) daemonize("kbnepd %s", dev->name); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 66bef1ccee2a..ca60a4517fd3 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -29,6 +29,7 @@ #include <linux/slab.h> #include <linux/poll.h> #include <linux/fcntl.h> +#include <linux/freezer.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/ioctl.h> @@ -287,7 +288,6 @@ static int cmtp_session(void *arg) daemonize("kcmtpd_ctr_%d", session->num); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&wait, current); add_wait_queue(sk->sk_sleep, &wait); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 63980bd6b5f2..5fdfc9a67d39 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) conn->state = BT_CONNECT; conn->out = 1; - cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); cp.handle = cpu_to_le16(handle); + cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp); } @@ -220,19 +220,19 @@ int hci_conn_del(struct hci_conn *conn) del_timer(&conn->disc_timer); - if (conn->type == SCO_LINK) { - struct hci_conn *acl = conn->link; - if (acl) { - acl->link = NULL; - hci_conn_put(acl); - } - } else { + if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; if (sco) sco->link = NULL; /* Unacked frames */ hdev->acl_cnt += conn->sent; + } else { + struct hci_conn *acl = conn->link; + if (acl) { + acl->link = NULL; + hci_conn_put(acl); + } } tasklet_disable(&hdev->tx_task); @@ -297,9 +297,10 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) { struct hci_conn *acl; + struct hci_conn *sco; BT_DBG("%s dst %s", hdev->name, batostr(dst)); @@ -313,28 +314,26 @@ struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) if (acl->state == BT_OPEN || acl->state == BT_CLOSED) hci_acl_connect(acl); - if (type == SCO_LINK) { - struct hci_conn *sco; + if (type == ACL_LINK) + return acl; - if (!(sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst))) { - if (!(sco = hci_conn_add(hdev, SCO_LINK, dst))) { - hci_conn_put(acl); - return NULL; - } + if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) { + if (!(sco = hci_conn_add(hdev, type, dst))) { + hci_conn_put(acl); + return NULL; } - acl->link = sco; - sco->link = acl; + } - hci_conn_hold(sco); + acl->link = sco; + sco->link = acl; - if (acl->state == BT_CONNECTED && - (sco->state == BT_OPEN || sco->state == BT_CLOSED)) - hci_add_sco(sco, acl->handle); + hci_conn_hold(sco); - return sco; - } else { - return acl; - } + if (acl->state == BT_CONNECTED && + (sco->state == BT_OPEN || sco->state == BT_CLOSED)) + hci_add_sco(sco, acl->handle); + + return sco; } EXPORT_SYMBOL(hci_connect); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aa4b56a8c3ea..18e3afc964df 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -183,6 +183,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) { struct sk_buff *skb; __le16 param; + __u8 flt_type; BT_DBG("%s %ld", hdev->name, opt); @@ -233,11 +234,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Optional initialization */ /* Clear Event Filters */ - { - struct hci_cp_set_event_flt cp; - cp.flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, sizeof(cp), &cp); - } + flt_type = HCI_FLT_CLEAR_ALL; + hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, 1, &flt_type); /* Page timeout ~20 secs */ param = cpu_to_le16(0x8000); @@ -826,7 +824,7 @@ EXPORT_SYMBOL(hci_free_dev); int hci_register_dev(struct hci_dev *hdev) { struct list_head *head = &hci_dev_list, *p; - int id = 0; + int i, id = 0; BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner); @@ -851,6 +849,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->flags = 0; hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); + hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); hdev->idle_timeout = 0; @@ -865,6 +864,9 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); + for (i = 0; i < 3; i++) + hdev->reassembly[i] = NULL; + init_waitqueue_head(&hdev->req_wait_q); init_MUTEX(&hdev->req_lock); @@ -889,6 +891,8 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ int hci_unregister_dev(struct hci_dev *hdev) { + int i; + BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); hci_unregister_sysfs(hdev); @@ -899,9 +903,13 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); + for (i = 0; i < 3; i++) + kfree_skb(hdev->reassembly[i]); + hci_notify(hdev, HCI_DEV_UNREG); __hci_dev_put(hdev); + return 0; } EXPORT_SYMBOL(hci_unregister_dev); @@ -922,6 +930,90 @@ int hci_resume_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_resume_dev); +/* Receive packet type fragment */ +#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) + +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +{ + if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + return -EILSEQ; + + while (count) { + struct sk_buff *skb = __reassembly(hdev, type); + struct { int expect; } *scb; + int len = 0; + + if (!skb) { + /* Start of the frame */ + + switch (type) { + case HCI_EVENT_PKT: + if (count >= HCI_EVENT_HDR_SIZE) { + struct hci_event_hdr *h = data; + len = HCI_EVENT_HDR_SIZE + h->plen; + } else + return -EILSEQ; + break; + + case HCI_ACLDATA_PKT: + if (count >= HCI_ACL_HDR_SIZE) { + struct hci_acl_hdr *h = data; + len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); + } else + return -EILSEQ; + break; + + case HCI_SCODATA_PKT: + if (count >= HCI_SCO_HDR_SIZE) { + struct hci_sco_hdr *h = data; + len = HCI_SCO_HDR_SIZE + h->dlen; + } else + return -EILSEQ; + break; + } + + skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!skb) { + BT_ERR("%s no memory for packet", hdev->name); + return -ENOMEM; + } + + skb->dev = (void *) hdev; + bt_cb(skb)->pkt_type = type; + + __reassembly(hdev, type) = skb; + + scb = (void *) skb->cb; + scb->expect = len; + } else { + /* Continuation */ + + scb = (void *) skb->cb; + len = scb->expect; + } + + len = min(len, count); + + memcpy(skb_put(skb, len), data, len); + + scb->expect -= len; + + if (scb->expect == 0) { + /* Complete frame */ + + __reassembly(hdev, type) = NULL; + + bt_cb(skb)->pkt_type = type; + hci_recv_frame(skb); + } + + count -= len; data += len; + } + + return 0; +} +EXPORT_SYMBOL(hci_recv_fragment); + /* ---- Interface to upper protocols ---- */ /* Register/Unregister protocols. @@ -1029,7 +1121,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) { - BT_ERR("%s Can't allocate memory for HCI command", hdev->name); + BT_ERR("%s no memory for command", hdev->name); return -ENOMEM; } @@ -1161,7 +1253,7 @@ EXPORT_SYMBOL(hci_send_sco); static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *conn = NULL; + struct hci_conn *conn = NULL; int num = 0, min = ~0; struct list_head *p; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 447ba7131220..4baea1e38652 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -350,11 +350,24 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s if (hdev->features[0] & LMP_5SLOT) hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - if (hdev->features[1] & LMP_HV2) - hdev->pkt_type |= (HCI_HV2); + if (hdev->features[1] & LMP_HV2) { + hdev->pkt_type |= (HCI_HV2); + hdev->esco_type |= (ESCO_HV2); + } + + if (hdev->features[1] & LMP_HV3) { + hdev->pkt_type |= (HCI_HV3); + hdev->esco_type |= (ESCO_HV3); + } - if (hdev->features[1] & LMP_HV3) - hdev->pkt_type |= (HCI_HV3); + if (hdev->features[3] & LMP_ESCO) + hdev->esco_type |= (ESCO_EV3); + + if (hdev->features[4] & LMP_EV4) + hdev->esco_type |= (ESCO_EV4); + + if (hdev->features[4] & LMP_EV5) + hdev->esco_type |= (ESCO_EV5); BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]); @@ -881,12 +894,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s if (conn) { conn->sent -= count; - if (conn->type == SCO_LINK) { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) - hdev->sco_cnt = hdev->sco_pkts; - } else { + if (conn->type == ACL_LINK) { if ((hdev->acl_cnt += count) > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; + } else { + if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt = hdev->sco_pkts; } } } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1dae3dfc66a9..5ccea5fbd236 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -37,6 +37,7 @@ #include <linux/skbuff.h> #include <linux/workqueue.h> #include <linux/interrupt.h> +#include <linux/compat.h> #include <linux/socket.h> #include <linux/ioctl.h> #include <net/sock.h> @@ -70,15 +71,15 @@ static struct hci_sec_filter hci_sec_filter = { { { 0x0 }, /* OGF_LINK_CTL */ - { 0xbe000006, 0x00000001, 0x000000, 0x00 }, + { 0xbe000006, 0x00000001, 0x00000000, 0x00 }, /* OGF_LINK_POLICY */ - { 0x00005200, 0x00000000, 0x000000, 0x00 }, + { 0x00005200, 0x00000000, 0x00000000, 0x00 }, /* OGF_HOST_CTL */ - { 0xaab00200, 0x2b402aaa, 0x020154, 0x00 }, + { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 }, /* OGF_INFO_PARAM */ - { 0x000002be, 0x00000000, 0x000000, 0x00 }, + { 0x000002be, 0x00000000, 0x00000000, 0x00 }, /* OGF_STATUS_PARAM */ - { 0x000000ea, 0x00000000, 0x000000, 0x00 } + { 0x000000ea, 0x00000000, 0x00000000, 0x00 } } }; @@ -342,9 +343,24 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ if (mask & HCI_CMSG_TSTAMP) { struct timeval tv; + void *data; + int len; skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + + data = &tv; + len = sizeof(tv); +#ifdef CONFIG_COMPAT + if (msg->msg_flags & MSG_CMSG_COMPAT) { + struct compat_timeval ctv; + ctv.tv_sec = tv.tv_sec; + ctv.tv_usec = tv.tv_usec; + data = &ctv; + len = sizeof(ctv); + } +#endif + + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data); } } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 450eb0244bbf..64d89ca28847 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -28,6 +28,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/poll.h> +#include <linux/freezer.h> #include <linux/fcntl.h> #include <linux/skbuff.h> #include <linux/socket.h> @@ -547,7 +548,6 @@ static int hidp_session(void *arg) daemonize("khidpd_%04x%04x", vendor, product); set_user_nice(current, -15); - current->flags |= PF_NOFREEZE; init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 670ff95ca64b..c4e4ce4ebb2b 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -353,7 +353,7 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 } /* ---- Socket interface ---- */ -static struct sock *__l2cap_get_sock_by_addr(u16 psm, bdaddr_t *src) +static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) { struct sock *sk; struct hlist_node *node; @@ -368,7 +368,7 @@ found: /* Find socket with psm and source bdaddr. * Returns closest match. */ -static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src) +static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; struct hlist_node *node; @@ -392,7 +392,7 @@ static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src) /* Find socket with given address (psm, src). * Returns locked socket */ -static inline struct sock *l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src) +static inline struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) { struct sock *s; read_lock(&l2cap_sk_list.lock); @@ -586,7 +586,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ goto done; } - if (la->l2_psm > 0 && btohs(la->l2_psm) < 0x1001 && + if (la->l2_psm && btohs(la->l2_psm) < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { err = -EACCES; goto done; @@ -748,7 +748,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) write_lock_bh(&l2cap_sk_list.lock); for (psm = 0x1001; psm < 0x1100; psm += 2) - if (!__l2cap_get_sock_by_addr(psm, src)) { + if (!__l2cap_get_sock_by_addr(htobs(psm), src)) { l2cap_pi(sk)->psm = htobs(psm); l2cap_pi(sk)->sport = htobs(psm); err = 0; @@ -873,7 +873,7 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len) lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); if (sk->sk_type == SOCK_DGRAM) - put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2)); + put_unaligned(l2cap_pi(sk)->psm, (__le16 *) skb_put(skb, 2)); if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) { err = -EFAULT; @@ -1256,11 +1256,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned break; case 2: - *val = __le16_to_cpu(*((u16 *)opt->val)); + *val = __le16_to_cpu(*((__le16 *)opt->val)); break; case 4: - *val = __le32_to_cpu(*((u32 *)opt->val)); + *val = __le32_to_cpu(*((__le32 *)opt->val)); break; default: @@ -1287,11 +1287,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) break; case 2: - *((u16 *) opt->val) = cpu_to_le16(val); + *((__le16 *) opt->val) = cpu_to_le16(val); break; case 4: - *((u32 *) opt->val) = cpu_to_le32(val); + *((__le32 *) opt->val) = cpu_to_le32(val); break; default: @@ -1406,7 +1406,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd int result = 0, status = 0; u16 dcid = 0, scid = __le16_to_cpu(req->scid); - u16 psm = req->psm; + __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); @@ -1530,7 +1530,7 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd return 0; } -static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; u16 dcid, flags; @@ -1550,7 +1550,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr goto unlock; /* Reject if config buffer is too small. */ - len = cmd->len - sizeof(*req); + len = cmd_len - sizeof(*req); if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) { l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(sk, rsp, @@ -1748,15 +1748,16 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk l2cap_raw_recv(conn, skb); while (len >= L2CAP_CMD_HDR_SIZE) { + u16 cmd_len; memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); data += L2CAP_CMD_HDR_SIZE; len -= L2CAP_CMD_HDR_SIZE; - cmd.len = __le16_to_cpu(cmd.len); + cmd_len = le16_to_cpu(cmd.len); - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd.len, cmd.ident); + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident); - if (cmd.len > len || !cmd.ident) { + if (cmd_len > len || !cmd.ident) { BT_DBG("corrupted command"); break; } @@ -1775,7 +1776,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk break; case L2CAP_CONF_REQ: - err = l2cap_config_req(conn, &cmd, data); + err = l2cap_config_req(conn, &cmd, cmd_len, data); break; case L2CAP_CONF_RSP: @@ -1791,7 +1792,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk break; case L2CAP_ECHO_REQ: - l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd.len, data); + l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data); break; case L2CAP_ECHO_RSP: @@ -1820,8 +1821,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } - data += cmd.len; - len -= cmd.len; + data += cmd_len; + len -= cmd_len; } kfree_skb(skb); @@ -1863,7 +1864,7 @@ done: return 0; } -static inline int l2cap_conless_channel(struct l2cap_conn *conn, u16 psm, struct sk_buff *skb) +static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb) { struct sock *sk; @@ -1893,7 +1894,8 @@ done: static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) { struct l2cap_hdr *lh = (void *) skb->data; - u16 cid, psm, len; + u16 cid, len; + __le16 psm; skb_pull(skb, L2CAP_HDR_SIZE); cid = __le16_to_cpu(lh->cid); @@ -1907,7 +1909,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) break; case 0x0002: - psm = get_unaligned((u16 *) skb->data); + psm = get_unaligned((__le16 *) skb->data); skb_pull(skb, 2); l2cap_conless_channel(conn, psm, skb); break; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 52e04df323ea..bb7220770f2c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -33,6 +33,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/init.h> +#include <linux/freezer.h> #include <linux/wait.h> #include <linux/device.h> #include <linux/net.h> @@ -1940,7 +1941,6 @@ static int rfcomm_run(void *unused) daemonize("krfcommd"); set_user_nice(current, -10); - current->flags |= PF_NOFREEZE; BT_DBG(""); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 23ba61a13bdd..22a832098d44 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -267,7 +267,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) out: write_unlock_bh(&rfcomm_dev_lock); - if (err) { + if (err < 0) { kfree(dev); return err; } @@ -275,9 +275,10 @@ out: dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL); if (IS_ERR(dev->tty_dev)) { + err = PTR_ERR(dev->tty_dev); list_del(&dev->list); kfree(dev); - return PTR_ERR(dev->tty_dev); + return err; } return dev->id; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index c326602060a6..99292e8e1d0f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -41,11 +41,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (dest[0] & 1) - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); else if ((dst = __br_fdb_get(br, dest)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); return 0; } @@ -179,6 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_SOFTWARE | NETIF_F_NO_CSUM | - NETIF_F_GSO_ROBUST | NETIF_F_LLTX; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 3fc697293819..eb57502bb264 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -36,7 +36,7 @@ int __init br_fdb_init(void) br_fdb_cache = kmem_cache_create("bridge_fdb_cache", sizeof(struct net_bridge_fdb_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!br_fdb_cache) return -ENOMEM; @@ -384,6 +384,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (hold_time(br) == 0) return; + /* ignore packets unless we are using this port */ + if (!(source->state == BR_STATE_LEARNING || + source->state == BR_STATE_FORWARDING)) + return; + fdb = fdb_find(head, addr); if (likely(fdb)) { /* attempt to update an entry for a local interface */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ada7f495445c..bdd7c35c3c7b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -100,24 +100,13 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) } /* called under bridge lock */ -static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, +static void br_flood(struct net_bridge *br, struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { struct net_bridge_port *p; struct net_bridge_port *prev; - if (clone) { - struct sk_buff *skb2; - - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; - return; - } - - skb = skb2; - } - prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { @@ -148,13 +137,13 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_deliver); + br_flood(br, skb, __br_deliver); } /* called under bridge lock */ -void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_forward); + br_flood(br, skb, __br_forward); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index fefd7c12d521..9272f12f664c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -29,35 +29,24 @@ * Determine initial path cost based on speed. * using recommendations from 802.1d standard * - * Need to simulate user ioctl because not all device's that support - * ethtool, use ethtool_ops. Also, since driver might sleep need to - * not be holding any locks. + * Since driver might sleep need to not be holding any locks. */ static int port_cost(struct net_device *dev) { - struct ethtool_cmd ecmd = { ETHTOOL_GSET }; - struct ifreq ifr; - mm_segment_t old_fs; - int err; - - strncpy(ifr.ifr_name, dev->name, IFNAMSIZ); - ifr.ifr_data = (void __user *) &ecmd; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ethtool(&ifr); - set_fs(old_fs); - - if (!err) { - switch(ecmd.speed) { - case SPEED_100: - return 19; - case SPEED_1000: - return 4; - case SPEED_10000: - return 2; - case SPEED_10: - return 100; + if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { + struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; + + if (!dev->ethtool_ops->get_settings(dev, &ecmd)) { + switch(ecmd.speed) { + case SPEED_10000: + return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; + case SPEED_10: + return 100; + } } } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 420bbb9955e9..3a8a015c92e0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -43,7 +43,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; struct net_bridge_fdb_entry *dst; - int passedup = 0; + struct sk_buff *skb2; if (!p || p->state == BR_STATE_DISABLED) goto drop; @@ -55,39 +55,35 @@ int br_handle_frame_finish(struct sk_buff *skb) if (p->state == BR_STATE_LEARNING) goto drop; - if (br->dev->flags & IFF_PROMISC) { - struct sk_buff *skb2; + /* The packet skb2 goes to the local host (NULL to skip). */ + skb2 = NULL; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 != NULL) { - passedup = 1; - br_pass_frame_up(br, skb2); - } - } + if (br->dev->flags & IFF_PROMISC) + skb2 = skb; + + dst = NULL; if (is_multicast_ether_addr(dest)) { br->statistics.multicast++; - br_flood_forward(br, skb, !passedup); - if (!passedup) - br_pass_frame_up(br, skb); - goto out; + skb2 = skb; + } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + skb2 = skb; + /* Do not forward the packet since it's local. */ + skb = NULL; } - dst = __br_fdb_get(br, dest); - if (dst != NULL && dst->is_local) { - if (!passedup) - br_pass_frame_up(br, skb); - else - kfree_skb(skb); - goto out; - } + if (skb2 == skb) + skb2 = skb_clone(skb, GFP_ATOMIC); - if (dst != NULL) { - br_forward(dst->dst, skb); - goto out; - } + if (skb2) + br_pass_frame_up(br, skb2); - br_flood_forward(br, skb, 0); + if (skb) { + if (dst) + br_forward(dst->dst, skb); + else + br_flood_forward(br, skb); + } out: return 0; @@ -101,9 +97,8 @@ static int br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); - if (p && p->state != BR_STATE_DISABLED) + if (p) br_fdb_update(p->br, p, eth_hdr(skb)->h_source); - return 0; /* process further */ } @@ -112,9 +107,9 @@ static int br_handle_local_finish(struct sk_buff *skb) */ static inline int is_link_local(const unsigned char *dest) { - const u16 *a = (const u16 *) dest; - static const u16 *const b = (const u16 *const ) br_group_address; - static const u16 m = __constant_cpu_to_be16(0xfff0); + __be16 *a = (__be16 *)dest; + static const __be16 *b = (const __be16 *)br_group_address; + static const __be16 m = __constant_cpu_to_be16(0xfff0); return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa779874b9dd..fc13130035e7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -183,7 +183,7 @@ int nf_bridge_copy_header(struct sk_buff *skb) int err; int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow(skb, header_size); + err = skb_cow_head(skb, header_size); if (err) return err; @@ -509,8 +509,14 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; - __u32 len; struct sk_buff *skb = *pskb; + __u32 len = nf_bridge_encap_header_len(skb); + + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NF_STOLEN; + + if (unlikely(!pskb_may_pull(skb, len))) + goto out; if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { @@ -518,8 +524,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, if (!brnf_call_ip6tables) return NF_ACCEPT; #endif - if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) - goto out; nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } @@ -532,8 +536,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, !IS_PPPOE_IP(skb)) return NF_ACCEPT; - if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) - goto out; nf_bridge_pull_encap_header_rcsum(skb); if (!pskb_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 21bf3a9a03fd..e6dc6f52990d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -170,12 +170,8 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, - struct sk_buff *skb, - int clone); -extern void br_flood_forward(struct net_bridge *br, - struct sk_buff *skb, - int clone); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a786e7863200..1a430eccec9b 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -125,14 +125,14 @@ static void br_stp_start(struct net_bridge *br) char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (r == 0) { br->stp_enabled = BR_USER_STP; printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); } else { br->stp_enabled = BR_KERNEL_STP; printk(KERN_INFO "%s: starting userspace STP failed, " - "staring kernel STP\n", br->dev->name); + "starting kernel STP\n", br->dev->name); /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 33c6c4a7c689..c65f54e0e27f 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -147,20 +147,27 @@ static ssize_t show_stp_state(struct device *d, return sprintf(buf, "%d\n", br->stp_enabled); } -static void set_stp_state(struct net_bridge *br, unsigned long val) -{ - rtnl_lock(); - spin_unlock_bh(&br->lock); - br_stp_set_enabled(br, val); - spin_lock_bh(&br->lock); - rtnl_unlock(); -} static ssize_t store_stp_state(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(d, buf, len, set_stp_state); + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + rtnl_lock(); + br_stp_set_enabled(br, val); + rtnl_unlock(); + + return len; } static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, store_stp_state); @@ -360,8 +367,9 @@ static struct attribute_group bridge_group = { * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t brforward_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct device *dev = to_dev(kobj); struct net_bridge *br = to_bridge(dev); @@ -383,8 +391,7 @@ static ssize_t brforward_read(struct kobject *kobj, char *buf, static struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, - .mode = S_IRUGO, - .owner = THIS_MODULE, }, + .mode = S_IRUGO, }, .read = brforward_read, }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2da22927d8dd..79db51fcb476 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -29,8 +29,7 @@ struct brport_attribute { #define BRPORT_ATTR(_name,_mode,_show,_store) \ struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ - .mode = _mode, \ - .owner = THIS_MODULE, }, \ + .mode = _mode }, \ .show = _show, \ .store = _store, \ }; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 984e9c64fb80..457815fb5584 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -9,7 +9,6 @@ * */ -#include <linux/in.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_log.h> #include <linux/netfilter.h> diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 6fec35225699..204c968fa86d 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -36,7 +36,6 @@ #include <linux/timer.h> #include <linux/netlink.h> #include <linux/netdevice.h> -#include <linux/module.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/sock.h> diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac9984f98e59..6018d0e51938 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1513,6 +1513,7 @@ static struct nf_sockopt_ops ebt_sockopts = .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, + .owner = THIS_MODULE, }; static int __init ebtables_init(void) @@ -1525,14 +1526,14 @@ static int __init ebtables_init(void) if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; - printk(KERN_NOTICE "Ebtables v2.0 registered\n"); + printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); - printk(KERN_NOTICE "Ebtables v2.0 unregistered\n"); + printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/compat.c b/net/compat.c index 9a0f5f2b90c8..d74d82155d78 100644 --- a/net/compat.c +++ b/net/compat.c @@ -276,7 +276,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/core/datagram.c b/net/core/datagram.c index cb056f476126..029b93e246b4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ diff --git a/net/core/dev.c b/net/core/dev.c index 1561f61e4209..a76021c71207 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> +#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -151,9 +152,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback = netdev_dma_event, + }, +}; #endif /* @@ -803,7 +817,9 @@ int dev_alloc_name(struct net_device *dev, const char *name) */ int dev_change_name(struct net_device *dev, char *newname) { + char oldname[IFNAMSIZ]; int err = 0; + int ret; ASSERT_RTNL(); @@ -813,6 +829,8 @@ int dev_change_name(struct net_device *dev, char *newname) if (!dev_valid_name(newname)) return -EINVAL; + memcpy(oldname, dev->name, IFNAMSIZ); + if (strchr(newname, '%')) { err = dev_alloc_name(dev, newname); if (err < 0) @@ -824,10 +842,28 @@ int dev_change_name(struct net_device *dev, char *newname) else strlcpy(dev->name, newname, IFNAMSIZ); +rollback: device_rename(&dev->dev, dev->name); + + write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); - raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + write_unlock_bh(&dev_base_lock); + + ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + ret = notifier_to_errno(ret); + + if (ret) { + if (err) { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); + } else { + err = ret; + memcpy(dev->name, oldname, IFNAMSIZ); + goto rollback; + } + } return err; } @@ -942,7 +978,7 @@ int dev_open(struct net_device *dev) /* * Initialize multicasting status */ - dev_mc_upload(dev); + dev_set_rx_mode(dev); /* * Wakeup transmit queue engine @@ -1040,20 +1076,43 @@ int dev_close(struct net_device *dev) int register_netdevice_notifier(struct notifier_block *nb) { struct net_device *dev; + struct net_device *last; int err; rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); - if (!err) { - for_each_netdev(dev) { - nb->notifier_call(nb, NETDEV_REGISTER, dev); + if (err) + goto unlock; - if (dev->flags & IFF_UP) - nb->notifier_call(nb, NETDEV_UP, dev); - } + for_each_netdev(dev) { + err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = notifier_to_errno(err); + if (err) + goto rollback; + + if (!(dev->flags & IFF_UP)) + continue; + + nb->notifier_call(nb, NETDEV_UP, dev); } + +unlock: rtnl_unlock(); return err; + +rollback: + last = dev; + for_each_netdev(dev) { + if (dev == last) + break; + + if (dev->flags & IFF_UP) { + nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); + nb->notifier_call(nb, NETDEV_DOWN, dev); + } + nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + } + goto unlock; } /** @@ -1429,7 +1488,9 @@ gso: skb->next = nskb; return rc; } - if (unlikely(netif_queue_stopped(dev) && skb->next)) + if (unlikely((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) && + skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1510,8 +1571,10 @@ int dev_queue_xmit(struct sk_buff *skb) skb_headroom(skb)); if (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP))) + !((dev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP)) && + !((dev->features & NETIF_F_IPV6_CSUM) && + skb->protocol == htons(ETH_P_IPV6))) if (skb_checksum_help(skb)) goto out_kfree_skb; } @@ -1545,6 +1608,8 @@ gso: spin_lock(&dev->queue_lock); q = dev->qdisc; if (q->enqueue) { + /* reset queue_mapping to zero */ + skb->queue_mapping = 0; rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1574,7 +1639,8 @@ gso: HARD_TX_LOCK(dev, cpu); - if (!netif_queue_stopped(dev)) { + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1793,6 +1859,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1900,6 +1988,9 @@ ncls: skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { @@ -2015,12 +2106,13 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending(chan); + } } #endif return; @@ -2496,26 +2588,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - */ -void dev_set_promiscuity(struct net_device *dev, int inc) +static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else dev->flags |= IFF_PROMISC; if (dev->flags != old_flags) { - dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); @@ -2525,10 +2608,32 @@ void dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } /** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + __dev_set_promiscuity(dev, inc); + if (dev->flags != old_flags) + dev_set_rx_mode(dev); +} + +/** * dev_set_allmulti - update allmulti count on a device * @dev: device * @inc: modifier @@ -2544,11 +2649,194 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) - dev_mc_upload(dev); + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_set_rx_mode(dev); + } +} + +/* + * Upload unicast and multicast address lists to device and + * configure RX filtering. When the device doesn't support unicast + * filtering it is put in promiscous mode while unicast addresses + * are present. + */ +void __dev_set_rx_mode(struct net_device *dev) +{ + /* dev_open will call this function so the list will stay sane. */ + if (!(dev->flags&IFF_UP)) + return; + + if (!netif_device_present(dev)) + return; + + if (dev->set_rx_mode) + dev->set_rx_mode(dev); + else { + /* Unicast addresses changes may only happen under the rtnl, + * therefore calling __dev_set_promiscuity here is safe. + */ + if (dev->uc_count > 0 && !dev->uc_promisc) { + __dev_set_promiscuity(dev, 1); + dev->uc_promisc = 1; + } else if (dev->uc_count == 0 && dev->uc_promisc) { + __dev_set_promiscuity(dev, -1); + dev->uc_promisc = 0; + } + + if (dev->set_multicast_list) + dev->set_multicast_list(dev); + } +} + +void dev_set_rx_mode(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); +} + +int __dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + (*count)--; + return 0; + } + } + return -ENOENT; +} + +int __dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kmalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + (*count)++; + return 0; +} + +/** + * dev_unicast_delete - Release secondary unicast address. + * @dev: device + * @addr: address to delete + * @alen: length of @addr + * + * Release reference to a secondary unicast address and remove it + * from the device if the reference count drops to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_delete); + +/** + * dev_unicast_add - add a secondary unicast address + * @dev: device + * @addr: address to delete + * @alen: length of @addr + * + * Add a secondary unicast address to the device or increase + * the reference count if it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_unicast_add(struct net_device *dev, void *addr, int alen) +{ + int err; + + ASSERT_RTNL(); + + netif_tx_lock_bh(dev); + err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} +EXPORT_SYMBOL(dev_unicast_add); + +static void __dev_addr_discard(struct dev_addr_list **list) +{ + struct dev_addr_list *tmp; + + while (*list != NULL) { + tmp = *list; + *list = tmp->next; + if (tmp->da_users > tmp->da_gusers) + printk("__dev_addr_discard: address leakage! " + "da_users=%d\n", tmp->da_users); + kfree(tmp); + } +} + +static void dev_addr_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + + __dev_addr_discard(&dev->uc_list); + dev->uc_count = 0; + + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + + netif_tx_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -2580,6 +2868,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2594,7 +2884,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - dev_mc_upload(dev); + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + + dev_set_rx_mode(dev); /* * Have we downed the interface. We handle IFF_UP ourselves @@ -2607,7 +2900,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); if (!ret) - dev_mc_upload(dev); + dev_set_rx_mode(dev); } if (dev->flags & IFF_UP && @@ -3089,7 +3382,7 @@ int register_netdevice(struct net_device *dev) if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out; + goto err_uninit; } dev->ifindex = dev_new_index(); @@ -3103,10 +3396,26 @@ int register_netdevice(struct net_device *dev) = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out; + goto err_uninit; } } + /* Fix illegal checksum combinations */ + if ((dev->features & NETIF_F_HW_CSUM) && + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((dev->features & NETIF_F_NO_CSUM) && + (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + dev->name); + dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + + /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { @@ -3147,7 +3456,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_sysfs(dev); if (ret) - goto out; + goto err_uninit; dev->reg_state = NETREG_REGISTERED; /* @@ -3166,12 +3475,18 @@ int register_netdevice(struct net_device *dev) write_unlock_bh(&dev_base_lock); /* Notify protocols, that a new device appeared. */ - raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - - ret = 0; + ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + ret = notifier_to_errno(ret); + if (ret) + unregister_netdevice(dev); out: return ret; + +err_uninit: + if (dev->uninit) + dev->uninit(dev); + goto out; } /** @@ -3343,16 +3658,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev) } /** - * alloc_netdev - allocate network device + * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device + * @queue_count: the number of subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. + * and performs basic initialization. Also allocates subquue structs + * for each queue on the device at the end of the netdevice. */ -struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)) +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), unsigned int queue_count) { void *p; struct net_device *dev; @@ -3361,7 +3678,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); /* ensure 32-byte alignment of both the device and private area */ - alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST + + (sizeof(struct net_device_subqueue) * (queue_count - 1))) & + ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); @@ -3374,15 +3693,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - if (sizeof_priv) - dev->priv = netdev_priv(dev); + if (sizeof_priv) { + dev->priv = ((char *)dev + + ((sizeof(struct net_device) + + (sizeof(struct net_device_subqueue) * + (queue_count - 1)) + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST)); + } + + dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; setup(dev); strcpy(dev->name, name); return dev; } -EXPORT_SYMBOL(alloc_netdev); +EXPORT_SYMBOL(alloc_netdev_mq); /** * free_netdev - free network device @@ -3471,9 +3797,9 @@ void unregister_netdevice(struct net_device *dev) raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* - * Flush the multicast chain + * Flush the unicast and multicast chains */ - dev_mc_discard(dev); + dev_addr_discard(dev); if (dev->uninit) dev->uninit(dev); @@ -3559,16 +3885,19 @@ static int dev_cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_NET_DMA /** - * net_dma_rebalance - - * This is called when the number of channels allocated to the net_dma_client - * changes. The net_dma_client tries to have one DMA channel per CPU. + * net_dma_rebalance - try to maintain one DMA channel per CPU + * @net_dma: DMA client and associated data (lock, channels, channel_mask) + * + * This is called when the number of channels allocated to the net_dma client + * changes. The net_dma client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) + +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3577,10 +3906,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3589,32 +3920,61 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** * netdev_dma_event - event callback for the net_dma_client * @client: should always be net_dma_client * @chan: DMA channel for the event - * @event: event type + * @state: DMA state to be handled */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) +static enum dma_state_client +netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) { - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } break; case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return ack; } /** @@ -3622,12 +3982,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register(&net_dma.client); + dma_async_client_chan_request(&net_dma.client); return 0; } @@ -3650,9 +4008,10 @@ int netdev_compute_features(unsigned long all, unsigned long one) if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - /* if device can't do all checksum, downgrade to ipv4 */ + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; + all ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; if (one & NETIF_F_GSO) one |= NETIF_F_GSO_SOFTWARE; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5a54053386c8..20330c572610 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -64,85 +64,24 @@ */ /* - * Update the multicast list into the physical NIC controller. - */ - -static void __dev_mc_upload(struct net_device *dev) -{ - /* Don't do anything till we up the interface - * [dev_open will call this function so the list will - * stay sane] - */ - - if (!(dev->flags&IFF_UP)) - return; - - /* - * Devices with no set multicast or which have been - * detached don't get set. - */ - - if (dev->set_multicast_list == NULL || - !netif_device_present(dev)) - return; - - dev->set_multicast_list(dev); -} - -void dev_mc_upload(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_mc_upload(dev); - netif_tx_unlock_bh(dev); -} - -/* * Delete a device level multicast */ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, **dmip; + int err; netif_tx_lock_bh(dev); - - for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, + addr, alen, glbl); + if (!err) { /* - * Find the entry we want to delete. The device could - * have variable length entries so check these too. + * We have altered the list, so the card + * loaded filter is now wrong. Fix it */ - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - alen == dmi->dmi_addrlen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 0; - if (old_glbl == 0) - break; - } - if (--dmi->dmi_users) - goto done; - - /* - * Last user. So delete the entry. - */ - *dmip = dmi->next; - dev->mc_count--; - - kfree(dmi); - - /* - * We have altered the list, so the card - * loaded filter is now wrong. Fix it - */ - __dev_mc_upload(dev); - - netif_tx_unlock_bh(dev); - return 0; - } + + __dev_set_rx_mode(dev); } - err = -ENOENT; -done: netif_tx_unlock_bh(dev); return err; } @@ -153,68 +92,96 @@ done: int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { - int err = 0; - struct dev_mc_list *dmi, *dmi1; - - dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + int err; netif_tx_lock_bh(dev); - for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && - dmi->dmi_addrlen == alen) { - if (glbl) { - int old_glbl = dmi->dmi_gusers; - dmi->dmi_gusers = 1; - if (old_glbl) - goto done; - } - dmi->dmi_users++; - goto done; - } - } - - if ((dmi = dmi1) == NULL) { - netif_tx_unlock_bh(dev); - return -ENOMEM; - } - memcpy(dmi->dmi_addr, addr, alen); - dmi->dmi_addrlen = alen; - dmi->next = dev->mc_list; - dmi->dmi_users = 1; - dmi->dmi_gusers = glbl ? 1 : 0; - dev->mc_list = dmi; - dev->mc_count++; + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + if (!err) + __dev_set_rx_mode(dev); + netif_tx_unlock_bh(dev); + return err; +} - __dev_mc_upload(dev); +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da, *next; + int err = 0; - netif_tx_unlock_bh(dev); - return 0; + netif_tx_lock_bh(to); + da = from->mc_list; + while (da != NULL) { + next = da->next; + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + da = next; + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); -done: - netif_tx_unlock_bh(dev); - kfree(dmi1); return err; } +EXPORT_SYMBOL(dev_mc_sync); -/* - * Discard multicast list when a device is downed - */ -void dev_mc_discard(struct net_device *dev) +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(dev); - - while (dev->mc_list != NULL) { - struct dev_mc_list *tmp = dev->mc_list; - dev->mc_list = tmp->next; - if (tmp->dmi_users > tmp->dmi_gusers) - printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); - kfree(tmp); + struct dev_addr_list *da, *next; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + da = from->mc_list; + while (da != NULL) { + next = da->next; + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + da = next; } - dev->mc_count = 0; + __dev_set_rx_mode(to); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); } +EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) @@ -244,7 +211,7 @@ static void dev_mc_seq_stop(struct seq_file *seq, void *v) static int dev_mc_seq_show(struct seq_file *seq, void *v) { - struct dev_mc_list *m; + struct dev_addr_list *m; struct net_device *dev = v; netif_tx_lock_bh(dev); @@ -292,4 +259,3 @@ void __init dev_mcast_init(void) EXPORT_SYMBOL(dev_mc_add); EXPORT_SYMBOL(dev_mc_delete); -EXPORT_SYMBOL(dev_mc_upload); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a09b576..c5e059352d43 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3,10 +3,12 @@ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> * * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. + * the information ethtool needs. * - * It's GPL, stupid. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ #include <linux/module.h> @@ -52,6 +54,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -82,18 +95,6 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } -int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) -{ - unsigned char len = dev->addr_len; - if ( addr->size < len ) - return -ETOOSMALL; - - addr->size = len; - memcpy(data, dev->perm_addr, len); - return 0; -} - - u32 ethtool_op_get_ufo(struct net_device *dev) { return (dev->features & NETIF_F_UFO) != 0; @@ -766,34 +767,20 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; - u8 *data; - int ret; - - if (!dev->ethtool_ops->get_perm_addr) - return -EOPNOTSUPP; - if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + if (copy_from_user(&epaddr, useraddr, sizeof(epaddr))) return -EFAULT; - data = kmalloc(epaddr.size, GFP_USER); - if (!data) - return -ENOMEM; - - ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); - if (ret) - return ret; + if (epaddr.size < dev->addr_len) + return -ETOOSMALL; + epaddr.size = dev->addr_len; - ret = -EFAULT; if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) - goto out; + return -EFAULT; useraddr += sizeof(epaddr); - if (copy_to_user(useraddr, data, epaddr.size)) - goto out; - ret = 0; - - out: - kfree(data); - return ret; + if (copy_to_user(useraddr, dev->perm_addr, epaddr.size)) + return -EFAULT; + return 0; } /* The main entry point in this file. Called from net/core/dev.c */ @@ -810,7 +797,7 @@ int dev_ethtool(struct ifreq *ifr) return -ENODEV; if (!dev->ethtool_ops) - goto ioctl; + return -EOPNOTSUPP; if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; @@ -949,7 +936,7 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_gso(dev, useraddr); break; default: - rc = -EOPNOTSUPP; + rc = -EOPNOTSUPP; } if (dev->ethtool_ops->complete) @@ -959,20 +946,9 @@ int dev_ethtool(struct ifreq *ifr) netdev_features_change(dev); return rc; - - ioctl: - /* Keep existing behaviour for the moment. */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (dev->do_ioctl) - return dev->do_ioctl(dev, ifr, SIOCETHTOOL); - return -EOPNOTSUPP; } -EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); -EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); @@ -980,5 +956,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/flow.c b/net/core/flow.c index 051430545a05..0ab5234b17d8 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -350,7 +350,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9df26a07f067..f7de8f24d8dd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -33,6 +33,7 @@ #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> +#include <linux/log2.h> #define NEIGH_DEBUG 1 @@ -311,7 +312,7 @@ static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(new_entries & (new_entries - 1)); + BUG_ON(!is_power_of_2(new_entries)); new_hash = neigh_hash_alloc(new_entries); if (!new_hash) return; @@ -1347,7 +1348,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5df8cf425b41..de1b26aa5720 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); - if (netif_queue_stopped(dev) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + if ((netif_queue_stopped(dev) || + netif_subqueue_stopped(dev, skb->queue_mapping)) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); @@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev)) + if (!netif_queue_stopped(dev) && + !netif_subqueue_stopped(dev, skb->queue_mapping)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9cd3a1cb60ef..803d0c8826af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -111,6 +111,9 @@ * * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> * + * Fixed src_mac command to set source mac of packet to value specified in + * command by Adit Ranadive <adit.262@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -152,6 +155,9 @@ #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> +#ifdef CONFIG_XFRM +#include <net/xfrm.h> +#endif #include <asm/byteorder.h> #include <linux/rcupdate.h> #include <asm/bitops.h> @@ -181,6 +187,8 @@ #define F_MPLS_RND (1<<8) /* Random MPLS labels */ #define F_VID_RND (1<<9) /* Random VLAN ID */ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ +#define F_FLOW_SEQ (1<<11) /* Sequential flows */ +#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -207,8 +215,15 @@ static struct proc_dir_entry *pg_proc_dir = NULL; struct flow_state { __be32 cur_daddr; int count; +#ifdef CONFIG_XFRM + struct xfrm_state *x; +#endif + __u32 flags; }; +/* flow flag bits */ +#define F_INIT (1<<0) /* flow has been initialized */ + struct pktgen_dev { /* * Try to keep frequent/infrequent used vars. separated. @@ -228,6 +243,7 @@ struct pktgen_dev { int min_pkt_size; /* = ETH_ZLEN; */ int max_pkt_size; /* = ETH_ZLEN; */ + int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; __u32 delay_us; /* Default delay */ __u32 delay_ns; @@ -341,7 +357,11 @@ struct pktgen_dev { unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ - + unsigned curfl; /* current sequenced flow (state)*/ +#ifdef CONFIG_XFRM + __u8 ipsmode; /* IPSEC mode (config) */ + __u8 ipsproto; /* IPSEC type (config) */ +#endif char result[512]; }; @@ -363,7 +383,6 @@ struct pktgen_thread { /* Field for thread to receive "posted" events terminate, stop ifs etc. */ u32 control; - int pid; int cpu; wait_queue_head_t queue; @@ -550,7 +569,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_run_all_threads(); else - printk("pktgen: Unknown command: %s\n", data); + printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); err = count; @@ -690,6 +709,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->cflows) { + if (pkt_dev->flags & F_FLOW_SEQ) + seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + else + seq_printf(seq, "FLOW_RND "); + } + +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + seq_printf(seq, "IPSEC "); +#endif + if (pkt_dev->flags & F_MACSRC_RND) seq_printf(seq, "MACSRC_RND "); @@ -879,14 +910,14 @@ static ssize_t pktgen_if_write(struct file *file, pg_result = &(pkt_dev->result[0]); if (count < 1) { - printk("pktgen: wrong command format\n"); + printk(KERN_WARNING "pktgen: wrong command format\n"); return -EINVAL; } max = count - i; tmp = count_trail_chars(&user_buffer[i], max); if (tmp < 0) { - printk("pktgen: illegal format\n"); + printk(KERN_WARNING "pktgen: illegal format\n"); return tmp; } i += tmp; @@ -914,7 +945,7 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, + printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, (unsigned long)count, tb); } @@ -1181,6 +1212,14 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!SVID_RND") == 0) pkt_dev->flags &= ~F_SVID_RND; + else if (strcmp(f, "FLOW_SEQ") == 0) + pkt_dev->flags |= F_FLOW_SEQ; + +#ifdef CONFIG_XFRM + else if (strcmp(f, "IPSEC") == 0) + pkt_dev->flags |= F_IPSEC_ON; +#endif + else if (strcmp(f, "!IPV6") == 0) pkt_dev->flags &= ~F_IPV6; @@ -1189,7 +1228,7 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -1211,7 +1250,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_min; } if (debug) - printk("pktgen: dst_min set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_min set to: %s\n", pkt_dev->dst_min); i += len; sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); @@ -1234,7 +1273,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_daddr = pkt_dev->daddr_max; } if (debug) - printk("pktgen: dst_max set to: %s\n", + printk(KERN_DEBUG "pktgen: dst_max set to: %s\n", pkt_dev->dst_max); i += len; sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); @@ -1257,7 +1296,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); if (debug) - printk("pktgen: dst6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6=%s", buf); @@ -1280,7 +1319,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); if (debug) - printk("pktgen: dst6_min set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); @@ -1301,7 +1340,7 @@ static ssize_t pktgen_if_write(struct file *file, fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); if (debug) - printk("pktgen: dst6_max set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); @@ -1324,7 +1363,7 @@ static ssize_t pktgen_if_write(struct file *file, ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); if (debug) - printk("pktgen: src6 set to: %s\n", buf); + printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); i += len; sprintf(pg_result, "OK: src6=%s", buf); @@ -1345,7 +1384,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_min; } if (debug) - printk("pktgen: src_min set to: %s\n", + printk(KERN_DEBUG "pktgen: src_min set to: %s\n", pkt_dev->src_min); i += len; sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); @@ -1366,7 +1405,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->cur_saddr = pkt_dev->saddr_max; } if (debug) - printk("pktgen: src_max set to: %s\n", + printk(KERN_DEBUG "pktgen: src_max set to: %s\n", pkt_dev->src_max); i += len; sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); @@ -1415,8 +1454,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac")) { char *v = valstr; + unsigned char old_smac[ETH_ALEN]; unsigned char *m = pkt_dev->src_mac; + memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); + len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { return len; @@ -1445,6 +1487,10 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Src MAC */ + if (compare_ether_addr(old_smac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } @@ -1496,7 +1542,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN auto turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n"); } return count; } @@ -1511,10 +1557,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->vlan_id = value; /* turn on VLAN */ if (debug) - printk("pktgen: VLAN turned on\n"); + printk(KERN_DEBUG "pktgen: VLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id); @@ -1523,7 +1569,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1568,10 +1614,10 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = value; /* turn on SVLAN */ if (debug) - printk("pktgen: SVLAN turned on\n"); + printk(KERN_DEBUG "pktgen: SVLAN turned on\n"); if (debug && pkt_dev->nr_labels) - printk("pktgen: MPLS auto turned off\n"); + printk(KERN_DEBUG "pktgen: MPLS auto turned off\n"); pkt_dev->nr_labels = 0; /* turn off MPLS */ sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id); @@ -1580,7 +1626,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->svlan_id = 0xffff; if (debug) - printk("pktgen: VLAN/SVLAN turned off\n"); + printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n"); } return count; } @@ -1740,10 +1786,11 @@ static ssize_t pktgen_thread_write(struct file *file, i += len; if (debug) - printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count); + printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n", + name, (unsigned long)count); if (!t) { - printk("pktgen: ERROR: No thread\n"); + printk(KERN_ERR "pktgen: ERROR: No thread\n"); ret = -EINVAL; goto out; } @@ -1854,8 +1901,8 @@ static void pktgen_mark_device(const char *ifname) mutex_lock(&pktgen_thread_lock); if (++i >= max_tries) { - printk("pktgen_mark_device: timed out after waiting " - "%d msec for device %s to be removed\n", + printk(KERN_ERR "pktgen_mark_device: timed out after " + "waiting %d msec for device %s to be removed\n", msec_per_try * i, ifname); break; } @@ -1925,15 +1972,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) odev = dev_get_by_name(ifname); if (!odev) { - printk("pktgen: no such netdevice: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk("pktgen: not an ethernet device: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk("pktgen: device is down: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; @@ -1950,7 +1997,8 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { if (!pkt_dev->odev) { - printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); + printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " + "setup_inject.\n"); sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); return; @@ -2012,7 +2060,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } rcu_read_unlock(); if (err) - printk("pktgen: ERROR: IPv6 link address not availble.\n"); + printk(KERN_ERR "pktgen: ERROR: IPv6 link " + "address not availble.\n"); } #endif } else { @@ -2075,6 +2124,69 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) pkt_dev->idle_acc += now - start; } +static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) +{ + pkt_dev->pkt_overhead = 0; + pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); + pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); +} + +static inline int f_seen(struct pktgen_dev *pkt_dev, int flow) +{ + + if (pkt_dev->flows[flow].flags & F_INIT) + return 1; + else + return 0; +} + +static inline int f_pick(struct pktgen_dev *pkt_dev) +{ + int flow = pkt_dev->curfl; + + if (pkt_dev->flags & F_FLOW_SEQ) { + if (pkt_dev->flows[flow].count >= pkt_dev->lflow) { + /* reset time */ + pkt_dev->flows[flow].count = 0; + pkt_dev->curfl += 1; + if (pkt_dev->curfl >= pkt_dev->cflows) + pkt_dev->curfl = 0; /*reset */ + } + } else { + flow = random32() % pkt_dev->cflows; + + if (pkt_dev->flows[flow].count > pkt_dev->lflow) + pkt_dev->flows[flow].count = 0; + } + + return pkt_dev->curfl; +} + + +#ifdef CONFIG_XFRM +/* If there was already an IPSEC SA, we keep it as is, else + * we go look for it ... +*/ +static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) +{ + struct xfrm_state *x = pkt_dev->flows[flow].x; + if (!x) { + /*slow path: we dont already have xfrm_state*/ + x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + if (x) { + pkt_dev->flows[flow].x = x; + set_pkt_overhead(pkt_dev); + pkt_dev->pkt_overhead+=x->props.header_len; + } + + } +} +#endif /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2084,12 +2196,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 imx; int flow = 0; - if (pkt_dev->cflows) { - flow = random32() % pkt_dev->cflows; - - if (pkt_dev->flows[flow].count > pkt_dev->lflow) - pkt_dev->flows[flow].count = 0; - } + if (pkt_dev->cflows) + flow = f_pick(pkt_dev); /* Deal with source MAC */ if (pkt_dev->src_mac_count > 1) { @@ -2205,7 +2313,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_saddr = htonl(t); } - if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { + if (pkt_dev->cflows && f_seen(pkt_dev, flow)) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { imn = ntohl(pkt_dev->daddr_min); @@ -2235,8 +2343,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } if (pkt_dev->cflows) { + pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; +#ifdef CONFIG_XFRM + if (pkt_dev->flags & F_IPSEC_ON) + get_ipsec_sa(pkt_dev, flow); +#endif pkt_dev->nflows++; } } @@ -2277,6 +2390,93 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } + +#ifdef CONFIG_XFRM +static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) +{ + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int err = 0; + struct iphdr *iph; + + if (!x) + return 0; + /* XXX: we dont support tunnel mode for now until + * we resolve the dst issue */ + if (x->props.mode != XFRM_MODE_TRANSPORT) + return 0; + + spin_lock(&x->lock); + iph = ip_hdr(skb); + + err = x->mode->output(x, skb); + if (err) + goto error; + err = x->type->output(x, skb); + if (err) + goto error; + + x->curlft.bytes +=skb->len; + x->curlft.packets++; + spin_unlock(&x->lock); + +error: + spin_unlock(&x->lock); + return err; +} + +static inline void free_SAs(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->cflows) { + /* let go of the SAs if we have them */ + int i = 0; + for (; i < pkt_dev->nflows; i++){ + struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { + xfrm_state_put(x); + pkt_dev->flows[i].x = NULL; + } + } + } +} + +static inline int process_ipsec(struct pktgen_dev *pkt_dev, + struct sk_buff *skb, __be16 protocol) +{ + if (pkt_dev->flags & F_IPSEC_ON) { + struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; + int nhead = 0; + if (x) { + int ret; + __u8 *eth; + nhead = x->props.header_len - skb_headroom(skb); + if (nhead >0) { + ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (ret < 0) { + printk(KERN_ERR "Error expanding " + "ipsec packet %d\n",ret); + return 0; + } + } + + /* ipsec is not expecting ll header */ + skb_pull(skb, ETH_HLEN); + ret = pktgen_output_ipsec(skb, pkt_dev); + if (ret) { + printk(KERN_ERR "Error creating ipsec " + "packet %d\n",ret); + kfree_skb(skb); + return 0; + } + /* restore ll */ + eth = (__u8 *) skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 12); + *(u16 *) & eth[12] = protocol; + } + } + return 1; +} +#endif + static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned i; @@ -2323,9 +2523,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2368,7 +2566,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2391,8 +2589,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2463,6 +2660,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_usec = htonl(timestamp.tv_usec); } +#ifdef CONFIG_XFRM + if (!process_ipsec(pkt_dev, skb, protocol)) + return NULL; +#endif + return skb; } @@ -2662,9 +2864,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->nr_labels*sizeof(u32) + - VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), - GFP_ATOMIC); + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2708,7 +2908,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - - pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); + pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); @@ -2738,8 +2938,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->nr_labels * sizeof(u32) - - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev)); + pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; @@ -2857,6 +3056,7 @@ static void pktgen_run(struct pktgen_thread *t) pkt_dev->started_at = getCurUs(); pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ pkt_dev->next_tx_ns = 0; + set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); started++; @@ -2997,8 +3197,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - printk("pktgen: interface: %s is already stopped\n", - pkt_dev->odev->name); + printk(KERN_WARNING "pktgen: interface: %s is already " + "stopped\n", pkt_dev->odev->name); return -EINVAL; } @@ -3139,7 +3339,10 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if (netif_queue_stopped(odev) || need_resched()) { + if ((netif_queue_stopped(odev) || + (pkt_dev->skb && + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) || + need_resched()) { idle_start = getCurUs(); if (!netif_running(odev)) { @@ -3154,7 +3357,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev)) { + if (netif_queue_stopped(odev) || + netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3170,7 +3374,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { - printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n"); + printk(KERN_ERR "pktgen: ERROR: couldn't " + "allocate skb in fill_packet.\n"); schedule(); pkt_dev->clone_count--; /* back out increment, OOM */ goto out; @@ -3181,7 +3386,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev)) { + if (!netif_queue_stopped(odev) && + !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3266,14 +3472,14 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); - t->pid = current->pid; - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); max_before_softirq = t->max_before_softirq; set_current_state(TASK_INTERRUPTIBLE); + set_freezable(); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3372,7 +3578,8 @@ static int add_dev_to_thread(struct pktgen_thread *t, if_lock(t); if (pkt_dev->pg_thread) { - printk("pktgen: ERROR: already assigned to a thread.\n"); + printk(KERN_ERR "pktgen: ERROR: already assigned " + "to a thread.\n"); rv = -EBUSY; goto out; } @@ -3397,7 +3604,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev = __pktgen_NN_threads(ifname, FIND); if (pkt_dev) { - printk("pktgen: ERROR: interface already used.\n"); + printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); return -EBUSY; } @@ -3439,18 +3646,25 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir); if (!pkt_dev->entry) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; } pkt_dev->entry->proc_fops = &pktgen_if_fops; pkt_dev->entry->data = pkt_dev; +#ifdef CONFIG_XFRM + pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; + pkt_dev->ipsproto = IPPROTO_ESP; +#endif return add_dev_to_thread(t, pkt_dev); out2: dev_put(pkt_dev->odev); out1: +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3465,7 +3679,8 @@ static int __init pktgen_create_thread(int cpu) t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { - printk("pktgen: ERROR: out of memory, can't create new thread.\n"); + printk(KERN_ERR "pktgen: ERROR: out of memory, can't " + "create new thread.\n"); return -ENOMEM; } @@ -3478,7 +3693,8 @@ static int __init pktgen_create_thread(int cpu) p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + printk(KERN_ERR "pktgen: kernel_thread() failed " + "for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); @@ -3488,7 +3704,7 @@ static int __init pktgen_create_thread(int cpu) pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { - printk("pktgen: cannot create %s/%s procfs entry.\n", + printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, t->tsk->comm); kthread_stop(p); list_del(&t->th_list); @@ -3527,7 +3743,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); + printk(KERN_WARNING "pktgen: WARNING: trying to remove a " + "running interface, stopping it now.\n"); pktgen_stop_device(pkt_dev); } @@ -3545,6 +3762,9 @@ static int pktgen_remove_device(struct pktgen_thread *t, if (pkt_dev->entry) remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); +#ifdef CONFIG_XFRM + free_SAs(pkt_dev); +#endif if (pkt_dev->flows) vfree(pkt_dev->flows); kfree(pkt_dev); @@ -3556,7 +3776,7 @@ static int __init pg_init(void) int cpu; struct proc_dir_entry *pe; - printk(version); + printk(KERN_INFO "%s", version); pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); if (!pg_proc_dir) @@ -3565,8 +3785,8 @@ static int __init pg_init(void) pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); if (pe == NULL) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", - PGCTRL); + printk(KERN_ERR "pktgen: ERROR: cannot create %s " + "procfs entry.\n", PGCTRL); proc_net_remove(PG_PROC_DIR); return -EINVAL; } @@ -3582,12 +3802,13 @@ static int __init pg_init(void) err = pktgen_create_thread(cpu); if (err) - printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", - cpu, err); + printk(KERN_WARNING "pktgen: WARNING: Cannot create " + "thread for cpu %d (%d)\n", cpu, err); } if (list_empty(&pktgen_threads)) { - printk("pktgen: ERROR: Initialization failed for all threads\n"); + printk(KERN_ERR "pktgen: ERROR: Initialization failed for " + "all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(PG_PROC_DIR); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02e8bf084277..4756d5857abf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -97,6 +97,19 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } +int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return rtattr_parse_nested(tb, maxattr, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * maxattr); + return 0; +} + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) @@ -243,6 +256,150 @@ void rtnl_unregister_all(int protocol) EXPORT_SYMBOL_GPL(rtnl_unregister_all); +static LIST_HEAD(link_ops); + +/** + * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * The caller must hold the rtnl_mutex. This function should be used + * by drivers that create devices during module initialization. It + * must be called before registering the devices. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_link_register(struct rtnl_link_ops *ops) +{ + if (!ops->dellink) + ops->dellink = unregister_netdevice; + + list_add_tail(&ops->list, &link_ops); + return 0; +} + +EXPORT_SYMBOL_GPL(__rtnl_link_register); + +/** + * rtnl_link_register - Register rtnl_link_ops with rtnetlink. + * @ops: struct rtnl_link_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_link_register(struct rtnl_link_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_link_register(ops); + rtnl_unlock(); + return err; +} + +EXPORT_SYMBOL_GPL(rtnl_link_register); + +/** + * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + struct net_device *dev, *n; + + for_each_netdev_safe(dev, n) { + if (dev->rtnl_link_ops == ops) + ops->dellink(dev); + } + list_del(&ops->list); +} + +EXPORT_SYMBOL_GPL(__rtnl_link_unregister); + +/** + * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. + * @ops: struct rtnl_link_ops * to unregister + */ +void rtnl_link_unregister(struct rtnl_link_ops *ops) +{ + rtnl_lock(); + __rtnl_link_unregister(ops); + rtnl_unlock(); +} + +EXPORT_SYMBOL_GPL(rtnl_link_unregister); + +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + +static size_t rtnl_link_get_size(const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + size_t size; + + if (!ops) + return 0; + + size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ + nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ + + if (ops->get_size) + /* IFLA_INFO_DATA + nested data */ + size += nlmsg_total_size(sizeof(struct nlattr)) + + ops->get_size(dev); + + if (ops->get_xstats_size) + size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ + + return size; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *linkinfo, *data; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) + goto err_cancel_link; + if (ops->fill_xstats) { + err = ops->fill_xstats(skb, dev); + if (err < 0) + goto err_cancel_link; + } + if (ops->fill_info) { + data = nla_nest_start(skb, IFLA_INFO_DATA); + if (data == NULL) + goto err_cancel_link; + err = ops->fill_info(skb, dev); + if (err < 0) + goto err_cancel_data; + nla_nest_end(skb, data); + } + + nla_nest_end(skb, linkinfo); + return 0; + +err_cancel_data: + nla_nest_cancel(skb, data); +err_cancel_link: + nla_nest_cancel(skb, linkinfo); +out: + return err; +} + static const int rtm_min[RTM_NR_FAMILIES] = { [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), @@ -437,7 +594,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(void) +static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,7 +609,8 @@ static inline size_t if_nlmsg_size(void) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_LINKMODE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, @@ -522,6 +680,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } } + if (dev->rtnl_link_ops) { + if (rtnl_link_fill(skb, dev) < 0) + goto nla_put_failure; + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -553,6 +716,8 @@ cont: static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, @@ -561,44 +726,16 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, }; -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct ifinfomsg *ifm; - struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; - - err = -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); - else - goto errout; - - if (dev == NULL) { - err = -ENODEV; - goto errout; - } - - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; +static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { + [IFLA_INFO_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_DATA] = { .type = NLA_NESTED }, +}; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; +static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, + struct nlattr **tb, char *ifname, int modified) +{ + int send_addr_notify = 0; + int err; if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; @@ -606,12 +743,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } u_map = nla_data(tb[IFLA_MAP]); @@ -624,7 +761,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_config(dev, &k_map); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -635,19 +772,19 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto errout; } len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), @@ -655,7 +792,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto errout; send_addr_notify = 1; modified = 1; } @@ -663,7 +800,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -675,7 +812,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) - goto errout_dev; + goto errout; modified = 1; } @@ -684,7 +821,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) send_addr_notify = 1; } - if (ifm->ifi_flags || ifm->ifi_change) { unsigned int flags = ifm->ifi_flags; @@ -712,7 +848,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = 0; -errout_dev: +errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " "some changes comitted already. Interface %s may " @@ -721,12 +857,241 @@ errout_dev: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct ifinfomsg *ifm; + struct net_device *dev; + int err; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(ifname); + else + goto errout; + + if (dev == NULL) { + err = -ENODEV; + goto errout; + } + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + err = do_setlink(dev, ifm, tb, ifname, 0); +errout_dev: dev_put(dev); errout: return err; } +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(ifname); + else + return -EINVAL; + + if (!dev) + return -ENODEV; + + ops = dev->rtnl_link_ops; + if (!ops) + return -EOPNOTSUPP; + + ops->dellink(dev); + return 0; +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + const struct rtnl_link_ops *ops; + struct net_device *dev; + struct ifinfomsg *ifm; + char kind[MODULE_NAME_LEN]; + char ifname[IFNAMSIZ]; + struct nlattr *tb[IFLA_MAX+1]; + struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + int err; + +#ifdef CONFIG_KMOD +replay: +#endif + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(ifm->ifi_index); + else if (ifname[0]) + dev = __dev_get_by_name(ifname); + else + dev = NULL; + + if (tb[IFLA_LINKINFO]) { + err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], ifla_info_policy); + if (err < 0) + return err; + } else + memset(linkinfo, 0, sizeof(linkinfo)); + + if (linkinfo[IFLA_INFO_KIND]) { + nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); + } else { + kind[0] = '\0'; + ops = NULL; + } + + if (1) { + struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + + if (ops) { + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + err = nla_parse_nested(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy); + if (err < 0) + return err; + data = attr; + } + if (ops->validate) { + err = ops->validate(tb, data); + if (err < 0) + return err; + } + } + + if (dev) { + int modified = 0; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || + !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data); + if (err < 0) + return err; + modified = 1; + } + + return do_setlink(dev, ifm, tb, ifname, modified); + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENODEV; + + if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + return -EOPNOTSUPP; + if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { +#ifdef CONFIG_KMOD + if (kind[0]) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + if (ops) + goto replay; + } +#endif + return -EOPNOTSUPP; + } + + if (!ifname[0]) + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + return -ENOMEM; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + if (ops->newlink) + err = ops->newlink(dev, tb, data); + else + err = register_netdevice(dev); +err_free: + if (err < 0) + free_netdev(dev); + return err; + } +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct ifinfomsg *ifm; @@ -747,7 +1112,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -797,7 +1162,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); if (skb == NULL) goto errout; @@ -952,6 +1317,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); @@ -960,6 +1327,7 @@ void __init rtnetlink_init(void) EXPORT_SYMBOL(__rta_fill); EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); +EXPORT_SYMBOL(__rtattr_parse_nested_compat); EXPORT_SYMBOL(rtnetlink_put_metrics); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); diff --git a/net/core/scm.c b/net/core/scm.c index 292ad8d5ad76..44c4ec2c8769 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -228,7 +228,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = security_file_receive(fp[i]); if (err) break; - err = get_unused_fd(); + err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags + ? O_CLOEXEC : 0); if (err < 0) break; new_fd = err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3943c3ad9145..35021eb3ed07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -415,9 +415,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) C(csum); C(local_df); n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->nohdr = 0; C(pkt_type); C(ip_summed); + skb_copy_queue_mapping(n, skb); C(priority); #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); @@ -426,6 +428,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->destructor = NULL; C(mark); __nf_copy(n, skb); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + C(nf_trace); +#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -459,6 +465,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->sk = NULL; new->dev = old->dev; + skb_copy_queue_mapping(new, old); new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -482,6 +489,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; new->mark = old->mark; __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif @@ -676,6 +687,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; skb->mac_header += off; skb->cloned = 0; + skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; @@ -1930,6 +1942,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) tail = nskb; nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; nskb->dst = dst_clone(skb->dst); @@ -2008,13 +2021,13 @@ void __init skb_init(void) sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } /** diff --git a/net/core/sock.c b/net/core/sock.c index c14ce0198d25..190de61cd648 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -171,6 +171,20 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; +static const char *af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , + "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", + "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , + "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , + "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , + "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , + "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , + "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , + "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , + "clock-27" , "clock-28" , "clock-29" , + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , + "clock-AF_RXRPC" , "clock-AF_MAX" +}; #endif /* @@ -210,13 +224,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) return -EDOM; if (tv.tv_sec < 0) { - static int warned = 0; + static int warned __read_mostly; + *timeo_p = 0; if (warned < 10 && net_ratelimit()) warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", - current->comm, current->pid); + current->comm, current->pid); return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -347,6 +362,61 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); +static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + char devname[IFNAMSIZ]; + int index; + + /* Sorry... */ + ret = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; + + ret = -EINVAL; + if (optlen < 0) + goto out; + + /* Bind this socket to a particular device like "eth0", + * as specified in the passed interface name. If the + * name is "" or the option length is zero the socket + * is not bound. + */ + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + memset(devname, 0, sizeof(devname)); + + ret = -EFAULT; + if (copy_from_user(devname, optval, optlen)) + goto out; + + if (devname[0] == '\0') { + index = 0; + } else { + struct net_device *dev = dev_get_by_name(devname); + + ret = -ENODEV; + if (!dev) + goto out; + + index = dev->ifindex; + dev_put(dev); + } + + lock_sock(sk); + sk->sk_bound_dev_if = index; + sk_dst_reset(sk); + release_sock(sk); + + ret = 0; + +out: +#endif + + return ret; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -375,6 +445,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif + if (optname == SO_BINDTODEVICE) + return sock_bindtodevice(sk, optval, optlen); + if (optlen < sizeof(int)) return -EINVAL; @@ -563,54 +636,6 @@ set_rcvbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; -#ifdef CONFIG_NETDEVICES - case SO_BINDTODEVICE: - { - char devname[IFNAMSIZ]; - - /* Sorry... */ - if (!capable(CAP_NET_RAW)) { - ret = -EPERM; - break; - } - - /* Bind this socket to a particular device like "eth0", - * as specified in the passed interface name. If the - * name is "" or the option length is zero the socket - * is not bound. - */ - - if (!valbool) { - sk->sk_bound_dev_if = 0; - } else { - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { - ret = -EFAULT; - break; - } - - /* Remove any cached route for this socket. */ - sk_dst_reset(sk); - - if (devname[0] == '\0') { - sk->sk_bound_dev_if = 0; - } else { - struct net_device *dev = dev_get_by_name(devname); - if (!dev) { - ret = -ENODEV; - break; - } - sk->sk_bound_dev_if = dev->ifindex; - dev_put(dev); - } - } - break; - } -#endif - - case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { @@ -940,8 +965,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family); + lockdep_set_class_and_name(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family, + af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1529,8 +1555,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); - lockdep_set_class(&sk->sk_callback_lock, - af_callback_keys + sk->sk_family); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1751,7 +1778,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -1769,7 +1796,7 @@ int proto_register(struct proto *prot, int alloc_slab) sprintf(request_sock_slab_name, mask, prot->name); prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", @@ -1791,7 +1818,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } @@ -1851,46 +1878,15 @@ void proto_unregister(struct proto *prot) EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS -static inline struct proto *__proto_head(void) -{ - return list_entry(proto_list.next, struct proto, node); -} - -static inline struct proto *proto_head(void) -{ - return list_empty(&proto_list) ? NULL : __proto_head(); -} - -static inline struct proto *proto_next(struct proto *proto) -{ - return proto->node.next == &proto_list ? NULL : - list_entry(proto->node.next, struct proto, node); -} - -static inline struct proto *proto_get_idx(loff_t pos) -{ - struct proto *proto; - loff_t i = 0; - - list_for_each_entry(proto, &proto_list, node) - if (i++ == pos) - goto out; - - proto = NULL; -out: - return proto; -} - static void *proto_seq_start(struct seq_file *seq, loff_t *pos) { read_lock(&proto_list_lock); - return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN; + return seq_list_start_head(&proto_list, *pos); } static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - ++*pos; - return v == SEQ_START_TOKEN ? proto_head() : proto_next(v); + return seq_list_next(v, &proto_list, pos); } static void proto_seq_stop(struct seq_file *seq, void *v) @@ -1938,7 +1934,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) static int proto_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) + if (v == &proto_list) seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", "protocol", "size", @@ -1950,7 +1946,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "module", "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); else - proto_seq_printf(seq, v); + proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; } diff --git a/net/core/utils.c b/net/core/utils.c index 2030bb8c2d30..0bf17da40d52 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -25,6 +25,7 @@ #include <linux/random.h> #include <linux/percpu.h> #include <linux/init.h> +#include <net/sock.h> #include <asm/byteorder.h> #include <asm/system.h> diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 01030f346177..7ac775f9a64b 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -481,14 +481,14 @@ int __init dccp_ackvec_init(void) { dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", sizeof(struct dccp_ackvec), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_slab == NULL) goto out_err; dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", sizeof(struct dccp_ackvec_record), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_record_slab == NULL) goto out_destroy_slab; diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index d8cf92f09e68..c45088b5e6fb 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -40,6 +40,7 @@ static inline void ccids_write_unlock(void) static inline void ccids_read_lock(void) { atomic_inc(&ccids_lockct); + smp_mb__after_atomic_inc(); spin_unlock_wait(&ccids_lock); } @@ -69,7 +70,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,. if (slab_name == NULL) return NULL; slab = kmem_cache_create(slab_name, sizeof(struct ccid) + obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (slab == NULL) kfree(slab_name); return slab; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ec7fa4d67f08..e91c2b9dc27b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1,8 +1,8 @@ /* * net/dccp/ccids/ccid3.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> * * An implementation of the DCCP protocol * @@ -49,7 +49,6 @@ static int ccid3_debug; static struct dccp_tx_hist *ccid3_tx_hist; static struct dccp_rx_hist *ccid3_rx_hist; -static struct dccp_li_hist *ccid3_li_hist; /* * Transmitter Half-Connection Routines @@ -194,25 +193,20 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) * The algorithm is not applicable if RTT < 4 microseconds. */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, - struct timeval *now) + ktime_t now) { - suseconds_t delta; u32 quarter_rtts; if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ return; - delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); - DCCP_BUG_ON(delta < 0); - - quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); + quarter_rtts /= hctx->ccid3hctx_rtt / 4; if (quarter_rtts > 0) { - hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_t_last_win_count = now; hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ - - ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); } } @@ -312,8 +306,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; - suseconds_t delay; + ktime_t now = ktime_get_real(); + s64 delay; BUG_ON(hctx == NULL); @@ -325,8 +319,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - dccp_timestamp(sk, &now); - switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, @@ -349,7 +341,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = now; + hctx->ccid3hctx_t_ld = ktime_to_timeval(now); } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -361,7 +353,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); + delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] @@ -371,10 +363,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) - return delay / 1000L; + if (delay - (s64)hctx->ccid3hctx_delta >= 1000) + return (u32)delay / 1000L; - ccid3_hc_tx_update_win_count(hctx, &now); + ccid3_hc_tx_update_win_count(hctx, now); break; case TFRC_SSTATE_TERM: DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); @@ -387,8 +379,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_idle = 0; /* set the nominal send time for the next following packet */ - timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); - + hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); return 0; } @@ -819,154 +811,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) return 0; } -/* calculate first loss interval - * - * returns estimated loss interval in usecs */ - -static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) -{ - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 x_recv, p; - suseconds_t rtt, delta; - struct timeval tstamp = { 0, }; - int interval = 0; - int win_count = 0; - int step = 0; - u64 fval; - - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, - dccphrx_node) { - if (dccp_rx_hist_entry_data_packet(entry)) { - tail = entry; - - switch (step) { - case 0: - tstamp = entry->dccphrx_tstamp; - win_count = entry->dccphrx_ccval; - step = 1; - break; - case 1: - interval = win_count - entry->dccphrx_ccval; - if (interval < 0) - interval += TFRC_WIN_COUNT_LIMIT; - if (interval > 4) - goto found; - break; - } - } - } - - if (unlikely(step == 0)) { - DCCP_WARN("%s(%p), packet history has no data packets!\n", - dccp_role(sk), sk); - return ~0; - } - - if (unlikely(interval == 0)) { - DCCP_WARN("%s(%p), Could not find a win_count interval > 0." - "Defaulting to 1\n", dccp_role(sk), sk); - interval = 1; - } -found: - if (!tail) { - DCCP_CRIT("tail is null\n"); - return ~0; - } - - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); - DCCP_BUG_ON(delta < 0); - - rtt = delta * 4 / interval; - ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", - dccp_role(sk), sk, (int)rtt); - - /* - * Determine the length of the first loss interval via inverse lookup. - * Assume that X_recv can be computed by the throughput equation - * s - * X_recv = -------- - * R * fval - * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. - */ - if (rtt == 0) { /* would result in divide-by-zero */ - DCCP_WARN("RTT==0\n"); - return ~0; - } - - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); - DCCP_BUG_ON(delta <= 0); - - x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); - if (x_recv == 0) { /* would also trigger divide-by-zero */ - DCCP_WARN("X_recv==0\n"); - if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { - DCCP_BUG("stored value of X_recv is zero"); - return ~0; - } - } - - fval = scaled_div(hcrx->ccid3hcrx_s, rtt); - fval = scaled_div32(fval, x_recv); - p = tfrc_calc_x_reverse_lookup(fval); - - ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " - "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); - - if (p == 0) - return ~0; - else - return 1000000 / p; -} - -static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) -{ - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_li_hist_entry *head; - u64 seq_temp; - - if (list_empty(&hcrx->ccid3hcrx_li_hist)) { - if (!dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) - return; - - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else { - struct dccp_li_hist_entry *entry; - struct list_head *tail; - - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - /* FIXME win count check removed as was wrong */ - /* should make this check with receive history */ - /* and compare there as per section 10.2 of RFC4342 */ - - /* new loss event detected */ - /* calculate last interval length */ - seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); - - if (entry == NULL) { - DCCP_BUG("out of memory - can not allocate entry"); - return; - } - - list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); - - tail = hcrx->ccid3hcrx_li_hist.prev; - list_del(tail); - kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); - - /* Create the newest interval */ - entry->dccplih_seqno = seq_loss; - entry->dccplih_interval = seq_temp; - entry->dccplih_win_count = win_loss; - } -} - static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { @@ -992,8 +836,15 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) > TFRC_RECV_NUM_LATE_LOSS) { loss = 1; - ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, - hcrx->ccid3hcrx_ccval_nonloss); + dccp_li_update_li(sk, + &hcrx->ccid3hcrx_li_hist, + &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_s, + hcrx->ccid3hcrx_bytes_recv, + hcrx->ccid3hcrx_x_recv, + hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; dccp_inc_seqno(&tmp_seqno); hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; @@ -1152,7 +1003,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ - dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); + dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) @@ -1236,19 +1087,12 @@ static __init int ccid3_module_init(void) if (ccid3_tx_hist == NULL) goto out_free_rx; - ccid3_li_hist = dccp_li_hist_new("ccid3"); - if (ccid3_li_hist == NULL) - goto out_free_tx; - rc = ccid_register(&ccid3); if (rc != 0) - goto out_free_loss_interval_history; + goto out_free_tx; out: return rc; -out_free_loss_interval_history: - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; out_free_tx: dccp_tx_hist_delete(ccid3_tx_hist); ccid3_tx_hist = NULL; @@ -1271,10 +1115,6 @@ static __exit void ccid3_module_exit(void) dccp_rx_hist_delete(ccid3_rx_hist); ccid3_rx_hist = NULL; } - if (ccid3_li_hist != NULL) { - dccp_li_hist_delete(ccid3_li_hist); - ccid3_li_hist = NULL; - } } module_exit(ccid3_module_exit); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 8d31b389c19c..51d4b804e334 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -36,6 +36,7 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ +#include <linux/ktime.h> #include <linux/list.h> #include <linux/time.h> #include <linux/types.h> @@ -108,10 +109,10 @@ struct ccid3_hc_tx_sock { enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; - struct timeval ccid3hctx_t_last_win_count; + ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; - struct timeval ccid3hctx_t_nom; + ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 372d7e75cdd8..174d3f13d93f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -1,8 +1,8 @@ /* * net/dccp/ccids/lib/loss_interval.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify @@ -15,58 +15,38 @@ #include <net/sock.h> #include "../../dccp.h" #include "loss_interval.h" +#include "packet_history.h" +#include "tfrc.h" -struct dccp_li_hist *dccp_li_hist_new(const char *name) -{ - struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_li_hist_mask[] = "li_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_li_hist_mask, name); - hist->dccplih_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_li_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccplih_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} +#define DCCP_LI_HIST_IVAL_F_LENGTH 8 -EXPORT_SYMBOL_GPL(dccp_li_hist_new); +struct dccp_li_hist_entry { + struct list_head dccplih_node; + u64 dccplih_seqno:48, + dccplih_win_count:4; + u32 dccplih_interval; +}; -void dccp_li_hist_delete(struct dccp_li_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccplih_slab); +static struct kmem_cache *dccp_li_cachep __read_mostly; - kmem_cache_destroy(hist->dccplih_slab); - kfree(name); - kfree(hist); +static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) +{ + return kmem_cache_alloc(dccp_li_cachep, prio); } -EXPORT_SYMBOL_GPL(dccp_li_hist_delete); +static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(dccp_li_cachep, entry); +} -void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) +void dccp_li_hist_purge(struct list_head *list) { struct dccp_li_hist_entry *entry, *next; list_for_each_entry_safe(entry, next, list, dccplih_node) { list_del_init(&entry->dccplih_node); - kmem_cache_free(hist->dccplih_slab, entry); + kmem_cache_free(dccp_li_cachep, entry); } } @@ -118,16 +98,16 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss) +static int dccp_li_hist_interval_new(struct list_head *list, + const u64 seq_loss, const u8 win_loss) { struct dccp_li_hist_entry *entry; int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); if (entry == NULL) { - dccp_li_hist_purge(hist, list); + dccp_li_hist_purge(list); DCCP_BUG("loss interval list entry is NULL"); return 0; } @@ -140,4 +120,176 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, return 1; } -EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ +static u32 dccp_li_calc_first_li(struct sock *sk, + struct list_head *hist_list, + struct timeval *last_feedback, + u16 s, u32 bytes_recv, + u32 previous_x_recv) +{ + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; + u32 x_recv, p; + suseconds_t rtt, delta; + struct timeval tstamp = { 0, 0 }; + int interval = 0; + int win_count = 0; + int step = 0; + u64 fval; + + list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_ccval; + step = 1; + break; + case 1: + interval = win_count - entry->dccphrx_ccval; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (unlikely(step == 0)) { + DCCP_WARN("%s(%p), packet history has no data packets!\n", + dccp_role(sk), sk); + return ~0; + } + + if (unlikely(interval == 0)) { + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." + "Defaulting to 1\n", dccp_role(sk), sk); + interval = 1; + } +found: + if (!tail) { + DCCP_CRIT("tail is null\n"); + return ~0; + } + + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + + rtt = delta * 4 / interval; + dccp_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); + + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); + return ~0; + } + + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, last_feedback); + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if (previous_x_recv == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } + x_recv = previous_x_recv; + } + + fval = scaled_div(s, rtt); + fval = scaled_div32(fval, x_recv); + p = tfrc_calc_x_reverse_lookup(fval); + + dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " + "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +void dccp_li_update_li(struct sock *sk, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, u32 bytes_recv, + u32 previous_x_recv, u64 seq_loss, u8 win_loss) +{ + struct dccp_li_hist_entry *head; + u64 seq_temp; + + if (list_empty(li_hist_list)) { + if (!dccp_li_hist_interval_new(li_hist_list, seq_loss, + win_loss)) + return; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list, + last_feedback, + s, bytes_recv, + previous_x_recv); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, + dccplih_node); + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(GFP_ATOMIC); + + if (entry == NULL) { + DCCP_BUG("out of memory - can not allocate entry"); + return; + } + + list_add(&entry->dccplih_node, li_hist_list); + + tail = li_hist_list->prev; + list_del(tail); + kmem_cache_free(dccp_li_cachep, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } +} + +EXPORT_SYMBOL_GPL(dccp_li_update_li); + +static __init int dccp_li_init(void) +{ + dccp_li_cachep = kmem_cache_create("dccp_li_hist", + sizeof(struct dccp_li_hist_entry), + 0, SLAB_HWCACHE_ALIGN, NULL); + return dccp_li_cachep == NULL ? -ENOBUFS : 0; +} + +static __exit void dccp_li_exit(void) +{ + kmem_cache_destroy(dccp_li_cachep); +} + +module_init(dccp_li_init); +module_exit(dccp_li_exit); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index eb257014dd74..906c806d6d9d 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -3,8 +3,8 @@ /* * net/dccp/ccids/lib/loss_interval.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> + * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify it @@ -14,44 +14,16 @@ */ #include <linux/list.h> -#include <linux/slab.h> #include <linux/time.h> -#define DCCP_LI_HIST_IVAL_F_LENGTH 8 - -struct dccp_li_hist { - struct kmem_cache *dccplih_slab; -}; - -extern struct dccp_li_hist *dccp_li_hist_new(const char *name); -extern void dccp_li_hist_delete(struct dccp_li_hist *hist); - -struct dccp_li_hist_entry { - struct list_head dccplih_node; - u64 dccplih_seqno:48, - dccplih_win_count:4; - u32 dccplih_interval; -}; - -static inline struct dccp_li_hist_entry * - dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const gfp_t prio) -{ - return kmem_cache_alloc(hist->dccplih_slab, prio); -} - -static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, - struct dccp_li_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccplih_slab, entry); -} - -extern void dccp_li_hist_purge(struct dccp_li_hist *hist, - struct list_head *list); +extern void dccp_li_hist_purge(struct list_head *list); extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, const u64 seq_loss, const u8 win_loss); +extern void dccp_li_update_li(struct sock *sk, + struct list_head *li_hist_list, + struct list_head *hist_list, + struct timeval *last_feedback, u16 s, + u32 bytes_recv, u32 previous_x_recv, + u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 2e8ef42721e2..34c4f6047724 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -59,7 +59,7 @@ struct dccp_tx_hist *dccp_tx_hist_new(const char *name) hist->dccptxh_slab = kmem_cache_create(slab_name, sizeof(struct dccp_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (hist->dccptxh_slab == NULL) goto out_free_slab_name; out: @@ -148,7 +148,7 @@ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) hist->dccprxh_slab = kmem_cache_create(slab_name, sizeof(struct dccp_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (hist->dccprxh_slab == NULL) goto out_free_slab_name; out: diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d8ad27bfe01a..e2d74cd7eeeb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -184,7 +184,7 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); /* * Checksumming routines */ -static inline int dccp_csum_coverage(const struct sk_buff *skb) +static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb) { const struct dccp_hdr* dh = dccp_hdr(skb); @@ -195,7 +195,7 @@ static inline int dccp_csum_coverage(const struct sk_buff *skb) static inline void dccp_csum_outgoing(struct sk_buff *skb) { - int cov = dccp_csum_coverage(skb); + unsigned int cov = dccp_csum_coverage(skb); if (cov >= skb->len) dccp_hdr(skb)->dccph_cscov = 0; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index cd845df5320d..5ebdd86c1b99 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -327,10 +327,16 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, } switch (type) { - case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; - case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; - default: DCCP_WARN("invalid type %d\n", type); return; - + case DCCPO_CHANGE_L: + opt->dccpop_type = DCCPO_CONFIRM_R; + break; + case DCCPO_CHANGE_R: + opt->dccpop_type = DCCPO_CONFIRM_L; + break; + default: + DCCP_WARN("invalid type %d\n", type); + kfree(opt); + return; } opt->dccpop_feat = feature; opt->dccpop_val = NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 31737cdf156a..b158c661867b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -253,17 +253,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; - if (opt == NULL && - np->rxopt.bits.osrcrt == 2 && - ireq6->pktopts) { - struct sk_buff *pktopts = ireq6->pktopts; - struct inet6_skb_parm *rxopt = IP6CB(pktopts); - - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(pktopts) + - rxopt->srcrt)); - } if (opt != NULL && opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; @@ -570,15 +559,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.osrcrt == 2 && opt == NULL && ireq6->pktopts) { - const struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts); - - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) + - rxopt->srcrt)); - } - if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 43a3adb027e7..bae10b0f2fc3 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -112,7 +112,7 @@ static struct jprobe dccp_send_probe = { .kp = { .symbol_name = "dccp_sendmsg", }, - .entry = JPROBE_ENTRY(jdccp_sendmsg), + .entry = jdccp_sendmsg, }; static int dccpprobe_open(struct inode *inode, struct file *file) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6607b7b14f34..04b59ec4f512 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1003,7 +1003,7 @@ static int __init dccp_init(void) dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bfa910b6ad25..ed76d4aab4a9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2304,7 +2304,7 @@ static int dn_socket_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_socket_seq_ops = { +static const struct seq_operations dn_socket_seq_ops = { .start = dn_socket_seq_start, .next = dn_socket_seq_next, .stop = dn_socket_seq_stop, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ab41c1879fd4..8def68209edd 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -461,7 +461,6 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) { dn_dn2eth(mac_addr, ifa->ifa_local); dev_mc_add(dev, mac_addr, ETH_ALEN, 0); - dev_mc_upload(dev); } } @@ -815,7 +814,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) { if (dn_idx < skip_naddr) - goto cont; + continue; if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, @@ -1064,8 +1063,6 @@ static int dn_eth_up(struct net_device *dev) else dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0); - dev_mc_upload(dev); - dn_db->use_long = 1; return 0; @@ -1419,7 +1416,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_dev_seq_ops = { +static const struct seq_operations dn_dev_seq_ops = { .start = dn_dev_seq_start, .next = dn_dev_seq_next, .stop = dn_dev_seq_stop, diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 4bf066c416e2..174d8a7a6dac 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -569,7 +569,7 @@ static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) NEIGH_SEQ_NEIGH_ONLY); } -static struct seq_operations dn_neigh_seq_ops = { +static const struct seq_operations dn_neigh_seq_ops = { .start = dn_neigh_seq_start, .next = neigh_seq_next, .stop = neigh_seq_stop, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a8bf106b7a61..a4a620971ef0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1726,7 +1726,7 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dn_rt_cache_seq_ops = { +static const struct seq_operations dn_rt_cache_seq_ops = { .start = dn_rt_cache_seq_start, .next = dn_rt_cache_seq_next, .stop = dn_rt_cache_seq_stop, @@ -1737,8 +1737,9 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct dn_rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct dn_rt_cache_iter_state *s; + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; rc = seq_open(file, &dn_rt_cache_seq_ops); @@ -1746,7 +1747,6 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) goto out_kfree; seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: @@ -1770,7 +1770,7 @@ void __init dn_route_init(void) dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); init_timer(&dn_route_timer); dn_route_timer.function = dn_dst_check_expire; dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index d6615c9361e9..fda0772fa215 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -881,7 +881,7 @@ void __init dn_fib_table_init(void) dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", sizeof(struct dn_fib_info), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); } void __exit dn_fib_table_cleanup(void) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index b5524f32ac2d..35c96bcc0f32 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1146,6 +1146,9 @@ static void __exit econet_proto_exit(void) sock_release(udpsock); #endif unregister_netdevice_notifier(&econet_netdev_notifier); +#ifdef CONFIG_ECONET_NATIVE + dev_remove_pack(&econet_packet_type); +#endif sock_unregister(econet_family_ops.family); proto_unregister(&econet_proto); } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 0ac2524f3b68..12c765715acf 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -266,8 +266,11 @@ void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, static int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; + if (netif_running(dev)) return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } @@ -316,9 +319,10 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an Ethernet device + * alloc_etherdev_mq - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device + * @queue_count: The number of queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. @@ -328,8 +332,8 @@ EXPORT_SYMBOL(ether_setup); * this private data area. */ -struct net_device *alloc_etherdev(int sizeof_priv) +struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) { - return alloc_netdev(sizeof_priv, "eth%d", ether_setup); + return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); } -EXPORT_SYMBOL(alloc_etherdev); +EXPORT_SYMBOL(alloc_etherdev_mq); diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index f2de2e48b021..6284c99b456e 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -366,6 +366,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, frag = WLAN_GET_SEQ_FRAG(sc); hdrlen = ieee80211_get_hdrlen(fc); + if (skb->len < hdrlen) { + printk(KERN_INFO "%s: invalid SKB length %d\n", + dev->name, skb->len); + goto rx_dropped; + } + /* Put this code here so that we avoid duplicating it in all * Rx paths. - Jean II */ #ifdef CONFIG_WIRELESS_EXT diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 523a137d49dd..465b73d50532 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -90,14 +90,11 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, } /* Add channel and frequency */ + /* Note : userspace automatically computes channel using iwrange */ iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = network->channel; - iwe.u.freq.e = 0; - iwe.u.freq.i = 0; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); - iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); iwe.u.freq.e = 6; + iwe.u.freq.i = 0; start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); /* Add encryption capability */ diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index afb6c6698b27..e475f2e1be13 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -273,8 +273,6 @@ ieee80211softmac_assoc_work(struct work_struct *work) ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify_scan, NULL); if (ieee80211softmac_start_scan(mac)) { dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n"); - mac->associnfo.associating = 0; - mac->associnfo.associated = 0; } goto out; } else { diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index c308756c2f9d..6398e6e67493 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -456,18 +456,13 @@ void ieee80211softmac_add_network_locked(struct ieee80211softmac_device *mac, struct ieee80211softmac_network *add_net) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; + struct ieee80211softmac_network *softmac_net; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + list_for_each_entry(softmac_net, &mac->network_list, list) { if(!memcmp(softmac_net->bssid, add_net->bssid, ETH_ALEN)) - break; - else - softmac_net = NULL; + return; } - if(softmac_net == NULL) - list_add(&(add_net->list), &mac->network_list); + list_add(&(add_net->list), &mac->network_list); } /* Add a network to the list, with locking */ @@ -506,16 +501,13 @@ struct ieee80211softmac_network * ieee80211softmac_get_network_by_bssid_locked(struct ieee80211softmac_device *mac, u8 *bssid) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + struct ieee80211softmac_network *softmac_net; + + list_for_each_entry(softmac_net, &mac->network_list, list) { if(!memcmp(softmac_net->bssid, bssid, ETH_ALEN)) - break; - else - softmac_net = NULL; + return softmac_net; } - return softmac_net; + return NULL; } /* Get a network from the list by BSSID with locking */ @@ -537,11 +529,9 @@ struct ieee80211softmac_network * ieee80211softmac_get_network_by_essid_locked(struct ieee80211softmac_device *mac, struct ieee80211softmac_essid *essid) { - struct list_head *list_ptr; - struct ieee80211softmac_network *softmac_net = NULL; + struct ieee80211softmac_network *softmac_net; - list_for_each(list_ptr, &mac->network_list) { - softmac_net = list_entry(list_ptr, struct ieee80211softmac_network, list); + list_for_each_entry(softmac_net, &mac->network_list, list) { if (softmac_net->essid.len == essid->len && !memcmp(softmac_net->essid.data, essid->data, essid->len)) return softmac_net; diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index d054e9224b3e..5742dc803b79 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -70,44 +70,30 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, char *extra) { struct ieee80211softmac_device *sm = ieee80211_priv(net_dev); - struct ieee80211softmac_network *n; struct ieee80211softmac_auth_queue_item *authptr; int length = 0; check_assoc_again: mutex_lock(&sm->associnfo.mutex); - /* Check if we're already associating to this or another network - * If it's another network, cancel and start over with our new network - * If it's our network, ignore the change, we're already doing it! - */ if((sm->associnfo.associating || sm->associnfo.associated) && (data->essid.flags && data->essid.length)) { - /* Get the associating network */ - n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid); - if(n && n->essid.len == data->essid.length && - !memcmp(n->essid.data, extra, n->essid.len)) { - dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n", - MAC_ARG(sm->associnfo.bssid)); - goto out; - } else { - dprintk(KERN_INFO PFX "Canceling existing associate request!\n"); - /* Cancel assoc work */ - cancel_delayed_work(&sm->associnfo.work); - /* We don't have to do this, but it's a little cleaner */ - list_for_each_entry(authptr, &sm->auth_queue, list) - cancel_delayed_work(&authptr->work); - sm->associnfo.bssvalid = 0; - sm->associnfo.bssfixed = 0; - sm->associnfo.associating = 0; - sm->associnfo.associated = 0; - /* We must unlock to avoid deadlocks with the assoc workqueue - * on the associnfo.mutex */ - mutex_unlock(&sm->associnfo.mutex); - flush_scheduled_work(); - /* Avoid race! Check assoc status again. Maybe someone started an - * association while we flushed. */ - goto check_assoc_again; - } + dprintk(KERN_INFO PFX "Canceling existing associate request!\n"); + /* Cancel assoc work */ + cancel_delayed_work(&sm->associnfo.work); + /* We don't have to do this, but it's a little cleaner */ + list_for_each_entry(authptr, &sm->auth_queue, list) + cancel_delayed_work(&authptr->work); + sm->associnfo.bssvalid = 0; + sm->associnfo.bssfixed = 0; + sm->associnfo.associating = 0; + sm->associnfo.associated = 0; + /* We must unlock to avoid deadlocks with the assoc workqueue + * on the associnfo.mutex */ + mutex_unlock(&sm->associnfo.mutex); + flush_scheduled_work(); + /* Avoid race! Check assoc status again. Maybe someone started an + * association while we flushed. */ + goto check_assoc_again; } sm->associnfo.static_essid = 0; @@ -128,7 +114,7 @@ check_assoc_again: sm->associnfo.associating = 1; /* queue lower level code to do work (if necessary) */ schedule_delayed_work(&sm->associnfo.work, 0); -out: + mutex_unlock(&sm->associnfo.mutex); return 0; @@ -153,13 +139,13 @@ ieee80211softmac_wx_get_essid(struct net_device *net_dev, data->essid.length = sm->associnfo.req_essid.len; data->essid.flags = 1; /* active */ memcpy(extra, sm->associnfo.req_essid.data, sm->associnfo.req_essid.len); - } - + dprintk(KERN_INFO PFX "Getting essid from req_essid\n"); + } else if (sm->associnfo.associated || sm->associnfo.associating) { /* If we're associating/associated, return that */ - if (sm->associnfo.associated || sm->associnfo.associating) { data->essid.length = sm->associnfo.associate_essid.len; data->essid.flags = 1; /* active */ memcpy(extra, sm->associnfo.associate_essid.data, sm->associnfo.associate_essid.len); + dprintk(KERN_INFO PFX "Getting essid from associate_essid\n"); } mutex_unlock(&sm->associnfo.mutex); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 010fbb2d45e9..fb7909774254 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH equal "cost" and chooses one of them in a non-deterministic fashion if a matching packet arrives. -config IP_ROUTE_MULTIPATH_CACHED - bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" - depends on IP_ROUTE_MULTIPATH - help - Normally, equal cost multipath routing is not supported by the - routing cache. If you say Y here, alternative routes are cached - and on cache lookup a route is chosen in a configurable fashion. - - If unsure, say N. - -config IP_ROUTE_MULTIPATH_RR - tristate "MULTIPATH: round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen according to Round Robin - -config IP_ROUTE_MULTIPATH_RANDOM - tristate "MULTIPATH: random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a random fashion. Actually, - there is no weight for a route. The advantage of this policy - is that it is implemented stateless and therefore introduces only - a very small delay. - -config IP_ROUTE_MULTIPATH_WRANDOM - tristate "MULTIPATH: weighted random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a weighted random fashion. - The per route weights are the weights visible via ip route 2. As the - corresponding state management introduces some overhead routing delay - is increased. - -config IP_ROUTE_MULTIPATH_DRR - tristate "MULTIPATH: interface round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Connections are distributed in a round robin fashion over the - available interfaces. This policy makes sense if the connections - should be primarily distributed on interfaces and not on routes. - config IP_ROUTE_VERBOSE bool "IP: verbose route monitoring" depends on IP_ADVANCED_ROUTER diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4ff6c151d7f3..fbf1674e0c2a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 90b241c5d1fa..e68103475cca 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1170,6 +1170,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) int ihl; int id; + if (!(features & NETIF_F_V4_CSUM)) + features &= ~NETIF_F_SG; + if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 6da8ff597ad3..39f6211f1496 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -46,7 +46,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) memcpy(daddr, optptr+optlen-4, 4); /* Fall through */ default: - memset(optptr+2, 0, optlen-2); + memset(optptr, 0, optlen); } l -= optlen; optptr += optlen; @@ -339,3 +339,4 @@ static void __exit ah4_fini(void) module_init(ah4_init); module_exit(ah4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e00767e8ebd9..9ab9d534fbac 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -885,7 +885,7 @@ static int arp_process(struct sk_buff *skb) if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(sip) == RTN_UNICAST) - n = __neigh_lookup(&arp_tbl, &sip, dev, -1); + n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } if (n) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index abf6352f990f..5dbe5803b7d5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1056,10 +1056,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); + if (!in_dev) + return notifier_from_errno(-ENOMEM); if (dev == &loopback_dev) { - if (!in_dev) - panic("devinet: " - "Failed to create loopback\n"); IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } @@ -1194,7 +1193,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { if (ip_idx < s_ip_idx) - goto cont; + continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 47c95e8ef045..98767a4f1185 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -481,3 +481,4 @@ static void __exit esp4_fini(void) module_init(esp4_init); module_exit(esp4_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 311d633f7f39..eff6bce453ee 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, case RTA_FLOW: cfg->fc_flow = nla_get_u32(attr); break; - case RTA_MP_ALGO: - cfg->fc_mp_alg = nla_get_u32(attr); - break; case RTA_TABLE: cfg->fc_table = nla_get_u32(attr); break; @@ -821,7 +817,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 07e843a47dde..9ad1d9ff9ce7 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -771,13 +771,13 @@ struct fib_table * __init fib_hash_init(u32 id) fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (fn_alias_kmem == NULL) fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bb94550d95c3..c434119deb52 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,7 +42,6 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> -#include <net/ip_mp_alg.h> #include <net/netlink.h> #include <net/nexthop.h> @@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; } #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (cfg->fc_mp_alg) { - if (cfg->fc_mp_alg < IP_MP_ALG_NONE || - cfg->fc_mp_alg > IP_MP_ALG_MAX) - goto err_inval; - } -#endif err = -ENOBUFS; if (fib_info_cnt >= fib_hash_size) { @@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = cfg->fc_mp_alg; -#endif - if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; @@ -940,10 +928,6 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - res->netmask = mask; - res->network = zone & inet_make_mask(prefixlen); -#endif atomic_inc(&res->fi->fib_clntref); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 30e332ade61b..9ca786a6fd3c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1970,7 +1970,7 @@ struct fib_table * __init fib_hash_init(u32 id) fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), GFP_KERNEL); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dbeacd8b0f90..def007ec1d6f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -836,12 +836,16 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact(skb, nlh); } +static DEFINE_MUTEX(inet_diag_mutex); + static void inet_diag_rcv(struct sock *sk, int len) { unsigned int qlen = 0; do { + mutex_lock(&inet_diag_mutex); netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } while (qlen); } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a73cf93cee36..2586df09b9b6 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,7 +14,8 @@ #include <net/ip.h> /* Must be called with locally disabled BHs. */ -void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) +static void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -47,8 +48,6 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi inet_twsk_put(tw); } -EXPORT_SYMBOL_GPL(__inet_twsk_kill); - /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2f44e6128068..771031dfbd0f 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -123,7 +123,7 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); /* All the timers, started at system startup tend to synchronize. Perturb it a bit. @@ -158,7 +158,7 @@ static void unlink_from_unused(struct inet_peer *p) #define lookup(_daddr,_stack) \ ({ \ struct inet_peer *u, **v; \ - if (_stack) { \ + if (_stack != NULL) { \ stackptr = _stack; \ *stackptr++ = &peer_root; \ } \ @@ -169,7 +169,7 @@ static void unlink_from_unused(struct inet_peer *p) v = &u->avl_left; \ else \ v = &u->avl_right; \ - if (_stack) \ + if (_stack != NULL) \ *stackptr++ = v; \ u = *v; \ } \ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 9cb04df0054b..8c95cf09f87a 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -86,7 +86,7 @@ int ip_forward(struct sk_buff *skb) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->u.dst) && - (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { + (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 63282934725e..5c14ed63e56c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -809,7 +809,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 251346828cb4..2f14745a9e1f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -513,11 +513,8 @@ void ip_options_undo(struct ip_options * opt) static struct ip_options *ip_options_get_alloc(const int optlen) { - struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), - GFP_KERNEL); - if (opt) - memset(opt, 0, sizeof(*opt)); - return opt; + return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + GFP_KERNEL); } static int ip_options_get_finish(struct ip_options **optp, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 34ea4547ebbe..0f1d7beacf78 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -75,7 +75,6 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> -#include <net/checksum.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> @@ -399,6 +398,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + to->nf_trace = from->nf_trace; +#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) to->ipvs_property = from->ipvs_property; #endif @@ -837,7 +840,7 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_ALL_CSUM && + rt->u.dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4d544573f48a..6b420aedcdcf 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -625,6 +625,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, { struct ip_mreqn mreq; + err = -EPROTO; + if (inet_sk(sk)->is_icsk) + break; + if (optlen < sizeof(struct ip_mreq)) goto e_inval; err = -EFAULT; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ab86137c71d2..e787044a8514 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -485,3 +485,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 342ca8d89458..c5b247077539 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1281,9 +1281,9 @@ static int __init ip_auto_config(void) */ if (ic_myaddr == NONE || #ifdef CONFIG_ROOT_NFS - (MAJOR(ROOT_DEV) == UNNAMED_MAJOR - && root_server_addr == NONE - && ic_servaddr == NONE) || + (root_server_addr == NONE + && ic_servaddr == NONE + && ROOT_DEV == Root_NFS) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ebd2f2d532f6..396437242a1b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -595,7 +595,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) */ max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d96582acdf69..7003cc1b7fe2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1917,7 +1917,7 @@ void __init ip_mr_init(void) mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 15ad5dd2d984..8d6901d4e94f 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -549,7 +549,7 @@ static int ip_vs_app_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_app_seq_ops = { +static const struct seq_operations ip_vs_app_seq_ops = { .start = ip_vs_app_seq_start, .next = ip_vs_app_seq_next, .stop = ip_vs_app_seq_stop, diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 7018f97c75dc..d612a6a5d957 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -745,7 +745,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_conn_seq_ops = { +static const struct seq_operations ip_vs_conn_seq_ops = { .start = ip_vs_conn_seq_start, .next = ip_vs_conn_seq_next, .stop = ip_vs_conn_seq_stop, @@ -901,7 +901,7 @@ int ip_vs_conn_init(void) /* Allocate ip_vs_conn slab cache */ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", sizeof(struct ip_vs_conn), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!ip_vs_conn_cachep) { vfree(ip_vs_conn_tab); return -ENOMEM; diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 68fe1d4d0210..f656d41d8d41 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -29,7 +29,6 @@ #include <linux/proc_fs.h> #include <linux/workqueue.h> #include <linux/swap.h> -#include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/netfilter.h> @@ -909,7 +908,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) write_lock_bh(&__ip_vs_svc_lock); /* Wait until all other svc users go away */ - while (atomic_read(&svc->usecnt) > 1) {}; + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); /* call the update_service, because server weight may be changed */ svc->scheduler->update_service(svc); @@ -1783,7 +1782,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip_vs_info_seq_ops = { +static const struct seq_operations ip_vs_info_seq_ops = { .start = ip_vs_info_seq_start, .next = ip_vs_info_seq_next, .stop = ip_vs_info_seq_stop, @@ -2340,6 +2339,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .get_optmin = IP_VS_BASE_CTL, .get_optmax = IP_VS_SO_GET_MAX+1, .get = do_ip_vs_get_ctl, + .owner = THIS_MODULE, }; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 900ce29db382..666e080a74a3 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -128,7 +128,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ (skb)->ipvs_property = 1; \ - (skb)->ip_summed = CHECKSUM_NONE; \ + skb_forward_csum(skb); \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ } while (0) diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c deleted file mode 100644 index 4e9ca7c76407..000000000000 --- a/net/ipv4/multipath.c +++ /dev/null @@ -1,55 +0,0 @@ -/* multipath.c: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com> - * Copyright (C) 2005 David S. Miller <davem@davemloft.net> - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/spinlock.h> - -#include <net/ip_mp_alg.h> - -static DEFINE_SPINLOCK(alg_table_lock); -struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; - -int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - int err; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || - !ops->mp_alg_select_route) - return -EINVAL; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot != NULL) { - err = -EBUSY; - } else { - *slot = ops; - err = 0; - } - spin_unlock(&alg_table_lock); - - return err; -} -EXPORT_SYMBOL(multipath_alg_register); - -void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) - return; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot == ops) - *slot = NULL; - spin_unlock(&alg_table_lock); - - synchronize_net(); -} -EXPORT_SYMBOL(multipath_alg_unregister); diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c deleted file mode 100644 index b03c5ca2c823..000000000000 --- a/net/ipv4/multipath_drr.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Device round robin policy for multipath. - * - * - * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -struct multipath_device { - int ifi; /* interface index of device */ - atomic_t usecount; - int allocated; -}; - -#define MULTIPATH_MAX_DEVICECANDIDATES 10 - -static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; -static DEFINE_SPINLOCK(state_lock); - -static int inline __multipath_findslot(void) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated == 0) - return i; - } - return -1; -} - -static int inline __multipath_finddev(int ifindex) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated != 0 && - state[i].ifi == ifindex) - return i; - } - return -1; -} - -static int drr_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - int devidx; - - switch (event) { - case NETDEV_UNREGISTER: - case NETDEV_DOWN: - spin_lock_bh(&state_lock); - - devidx = __multipath_finddev(dev->ifindex); - if (devidx != -1) { - state[devidx].allocated = 0; - state[devidx].ifi = 0; - atomic_set(&state[devidx].usecount, 0); - } - - spin_unlock_bh(&state_lock); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block drr_dev_notifier = { - .notifier_call = drr_dev_event, -}; - - -static void drr_safe_inc(atomic_t *usecount) -{ - int n; - - atomic_inc(usecount); - - n = atomic_read(usecount); - if (n <= 0) { - int i; - - spin_lock_bh(&state_lock); - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) - atomic_set(&state[i].usecount, 0); - - spin_unlock_bh(&state_lock); - } -} - -static void drr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *cur_min; - int min_usecount = -1; - int devidx = -1; - int cur_min_devidx = -1; - - /* 1. make sure all alt. nexthops have the same GC related data */ - /* 2. determine the new candidate to be returned */ - result = NULL; - cur_min = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - int nh_ifidx = nh->u.dst.dev->ifindex; - - nh->u.dst.lastuse = jiffies; - nh->u.dst.__use++; - if (result != NULL) - continue; - - /* search for the output interface */ - - /* this is not SMP safe, only add/remove are - * SMP safe as wrong usecount updates have no big - * impact - */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add the interface to the array - * SMP safe - */ - spin_lock_bh(&state_lock); - - /* due to SMP: search again */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add entry for device */ - devidx = __multipath_findslot(); - if (devidx == -1) { - /* unlikely but possible */ - continue; - } - - state[devidx].allocated = 1; - state[devidx].ifi = nh_ifidx; - atomic_set(&state[devidx].usecount, 0); - min_usecount = 0; - } - - spin_unlock_bh(&state_lock); - } - - if (min_usecount == 0) { - /* if the device has not been used it is - * the primary target - */ - drr_safe_inc(&state[devidx].usecount); - result = nh; - } else { - int count = - atomic_read(&state[devidx].usecount); - - if (min_usecount == -1 || - count < min_usecount) { - cur_min = nh; - cur_min_devidx = devidx; - min_usecount = count; - } - } - } - } - - if (!result) { - if (cur_min) { - drr_safe_inc(&state[cur_min_devidx].usecount); - result = cur_min; - } else { - result = first; - } - } - - *rp = result; -} - -static struct ip_mp_alg_ops drr_ops = { - .mp_alg_select_route = drr_select_route, -}; - -static int __init drr_init(void) -{ - int err = register_netdevice_notifier(&drr_dev_notifier); - - if (err) - return err; - - err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); - if (err) - goto fail; - - return 0; - -fail: - unregister_netdevice_notifier(&drr_dev_notifier); - return err; -} - -static void __exit drr_exit(void) -{ - unregister_netdevice_notifier(&drr_dev_notifier); - multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); -} - -module_init(drr_init); -module_exit(drr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c deleted file mode 100644 index c312785d14d0..000000000000 --- a/net/ipv4/multipath_random.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Random policy for multipath. - * - * - * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <linux/random.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -#define MULTIPATH_MAX_CANDIDATES 40 - -static void random_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - unsigned char candidate_count = 0; - - /* count all candidate */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) - ++candidate_count; - } - - /* choose a random candidate */ - decision = first; - if (candidate_count > 1) { - unsigned char i = 0; - unsigned char candidate_no = (unsigned char) - (random32() % candidate_count); - - /* find chosen candidate and adjust GC data for all candidates - * to ensure they stay in cache - */ - for (rt = first; rt; rt = rt->u.dst.rt_next) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - rt->u.dst.lastuse = jiffies; - - if (i == candidate_no) - decision = rt; - - if (i >= candidate_count) - break; - - i++; - } - } - } - - decision->u.dst.__use++; - *rp = decision; -} - -static struct ip_mp_alg_ops random_ops = { - .mp_alg_select_route = random_select_route, -}; - -static int __init random_init(void) -{ - return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); -} - -static void __exit random_exit(void) -{ - multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); -} - -module_init(random_init); -module_exit(random_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c deleted file mode 100644 index 0ad22524f450..000000000000 --- a/net/ipv4/multipath_rr.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Round robin policy for multipath. - * - * - * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -static void rr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *min_use_cand = NULL; - int min_use = -1; - - /* 1. make sure all alt. nexthops have the same GC related data - * 2. determine the new candidate to be returned - */ - result = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - nh->u.dst.lastuse = jiffies; - - if (min_use == -1 || nh->u.dst.__use < min_use) { - min_use = nh->u.dst.__use; - min_use_cand = nh; - } - } - } - result = min_use_cand; - if (!result) - result = first; - - result->u.dst.__use++; - *rp = result; -} - -static struct ip_mp_alg_ops rr_ops = { - .mp_alg_select_route = rr_select_route, -}; - -static int __init rr_init(void) -{ - return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); -} - -static void __exit rr_exit(void) -{ - multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); -} - -module_init(rr_init); -module_exit(rr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c deleted file mode 100644 index 57c503694539..000000000000 --- a/net/ipv4/multipath_wrandom.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Weighted random policy for multipath. - * - * - * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <linux/random.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_fib.h> -#include <net/ip_mp_alg.h> - -#define MULTIPATH_STATE_SIZE 15 - -struct multipath_candidate { - struct multipath_candidate *next; - int power; - struct rtable *rt; -}; - -struct multipath_dest { - struct list_head list; - - const struct fib_nh *nh_info; - __be32 netmask; - __be32 network; - unsigned char prefixlen; - - struct rcu_head rcu; -}; - -struct multipath_bucket { - struct list_head head; - spinlock_t lock; -}; - -struct multipath_route { - struct list_head list; - - int oif; - __be32 gw; - struct list_head dests; - - struct rcu_head rcu; -}; - -/* state: primarily weight per route information */ -static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; - -static unsigned char __multipath_lookup_weight(const struct flowi *fl, - const struct rtable *rt) -{ - const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; - struct multipath_route *r; - struct multipath_route *target_route = NULL; - struct multipath_dest *d; - int weight = 1; - - /* lookup the weight information for a certain route */ - rcu_read_lock(); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == rt->rt_gateway && - r->oif == rt->idev->dev->ifindex) { - target_route = r; - break; - } - } - - if (!target_route) { - /* this should not happen... but we are prepared */ - printk( KERN_CRIT"%s: missing state for gateway: %u and " \ - "device %d\n", __FUNCTION__, rt->rt_gateway, - rt->idev->dev->ifindex); - goto out; - } - - /* find state entry for destination */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - __be32 targetnetwork = fl->fl4_dst & - inet_make_mask(d->prefixlen); - - if ((targetnetwork & d->netmask) == d->network) { - weight = d->nh_info->nh_weight; - goto out; - } - } - -out: - rcu_read_unlock(); - return weight; -} - -static void wrandom_init_state(void) -{ - int i; - - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - INIT_LIST_HEAD(&state[i].head); - spin_lock_init(&state[i].lock); - } -} - -static void wrandom_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - struct multipath_candidate *first_mpc = NULL; - struct multipath_candidate *mpc, *last_mpc = NULL; - int power = 0; - int last_power; - int selector; - const size_t size_mpc = sizeof(struct multipath_candidate); - - /* collect all candidates and identify their weights */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - struct multipath_candidate* mpc = - (struct multipath_candidate*) - kmalloc(size_mpc, GFP_ATOMIC); - - if (!mpc) - return; - - power += __multipath_lookup_weight(flp, rt) * 10000; - - mpc->power = power; - mpc->rt = rt; - mpc->next = NULL; - - if (!first_mpc) - first_mpc = mpc; - else - last_mpc->next = mpc; - - last_mpc = mpc; - } - } - - /* choose a weighted random candidate */ - decision = first; - selector = random32() % power; - last_power = 0; - - /* select candidate, adjust GC data and cleanup local state */ - decision = first; - last_mpc = NULL; - for (mpc = first_mpc; mpc; mpc = mpc->next) { - mpc->rt->u.dst.lastuse = jiffies; - if (last_power <= selector && selector < mpc->power) - decision = mpc->rt; - - last_power = mpc->power; - kfree(last_mpc); - last_mpc = mpc; - } - - /* concurrent __multipath_flush may lead to !last_mpc */ - kfree(last_mpc); - - decision->u.dst.__use++; - *rp = decision; -} - -static void wrandom_set_nhinfo(__be32 network, - __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ - const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; - struct multipath_route *r, *target_route = NULL; - struct multipath_dest *d, *target_dest = NULL; - - /* store the weight information for a certain route */ - spin_lock_bh(&state[state_idx].lock); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { - target_route = r; - break; - } - } - - if (!target_route) { - const size_t size_rt = sizeof(struct multipath_route); - target_route = (struct multipath_route *) - kmalloc(size_rt, GFP_ATOMIC); - - target_route->gw = nh->nh_gw; - target_route->oif = nh->nh_oif; - memset(&target_route->rcu, 0, sizeof(struct rcu_head)); - INIT_LIST_HEAD(&target_route->dests); - - list_add_rcu(&target_route->list, &state[state_idx].head); - } - - /* find state entry for destination or add one if necessary */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - if (d->nh_info == nh) { - target_dest = d; - break; - } - } - - if (!target_dest) { - const size_t size_dst = sizeof(struct multipath_dest); - target_dest = (struct multipath_dest*) - kmalloc(size_dst, GFP_ATOMIC); - - target_dest->nh_info = nh; - target_dest->network = network; - target_dest->netmask = netmask; - target_dest->prefixlen = prefixlen; - memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); - - list_add_rcu(&target_dest->list, &target_route->dests); - } - /* else: we already stored this info for another destination => - * we are finished - */ - - spin_unlock_bh(&state[state_idx].lock); -} - -static void __multipath_free(struct rcu_head *head) -{ - struct multipath_route *rt = container_of(head, struct multipath_route, - rcu); - kfree(rt); -} - -static void __multipath_free_dst(struct rcu_head *head) -{ - struct multipath_dest *dst = container_of(head, - struct multipath_dest, - rcu); - kfree(dst); -} - -static void wrandom_flush(void) -{ - int i; - - /* defere delete to all entries */ - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - struct multipath_route *r; - - spin_lock_bh(&state[i].lock); - list_for_each_entry_rcu(r, &state[i].head, list) { - struct multipath_dest *d; - list_for_each_entry_rcu(d, &r->dests, list) { - list_del_rcu(&d->list); - call_rcu(&d->rcu, - __multipath_free_dst); - } - list_del_rcu(&r->list); - call_rcu(&r->rcu, - __multipath_free); - } - - spin_unlock_bh(&state[i].lock); - } -} - -static struct ip_mp_alg_ops wrandom_ops = { - .mp_alg_select_route = wrandom_select_route, - .mp_alg_flush = wrandom_flush, - .mp_alg_set_nhinfo = wrandom_set_nhinfo, -}; - -static int __init wrandom_init(void) -{ - wrandom_init_state(); - - return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -static void __exit wrandom_exit(void) -{ - multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -module_init(wrandom_init); -module_exit(wrandom_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 46509fae9fd8..fa97947c6ae1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -230,7 +230,7 @@ config IP_NF_TARGET_NETMAP To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_SAME - tristate "SAME target support" + tristate "SAME target support (OBSOLETE)" depends on NF_NAT help This option adds a `SAME' target, which works like the standard SNAT diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index cae41215e3c7..29114a9ccd1d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -224,7 +224,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; struct arphdr *arp; - int hotdrop = 0; + bool hotdrop = false; struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; @@ -1140,13 +1140,13 @@ void arpt_unregister_table(struct arpt_table *table) } /* The built-in targets: standard (NULL) and error. */ -static struct arpt_target arpt_standard_target = { +static struct arpt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NF_ARP, }; -static struct arpt_target arpt_error_target = { +static struct arpt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, @@ -1161,6 +1161,7 @@ static struct nf_sockopt_ops arpt_sockopts = { .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, + .owner = THIS_MODULE, }; static int __init arp_tables_init(void) @@ -1184,7 +1185,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - printk("arp_tables: (C) 2002 David S. Miller\n"); + printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 6298d404e7c7..c4bdab47597f 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -65,7 +65,7 @@ target(struct sk_buff **pskb, return mangle->target; } -static int +static bool checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -73,15 +73,15 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return 0; + return false; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != ARPT_CONTINUE) - return 0; - return 1; + return false; + return true; } -static struct arpt_target arpt_mangle_reg = { +static struct arpt_target arpt_mangle_reg __read_mostly = { .name = "mangle", .target = target, .targetsize = sizeof(struct arpt_mangle), diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9bacf1a03630..6486894f450c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -152,20 +152,20 @@ ip_packet_match(const struct iphdr *ip, return 1; } -static inline int +static inline bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) { duprintf("Unknown flag bits set: %08X\n", ip->flags & ~IPT_F_MASK); - return 0; + return false; } if (ip->invflags & ~IPT_INV_MASK) { duprintf("Unknown invflag bits set: %08X\n", ip->invflags & ~IPT_INV_MASK); - return 0; + return false; } - return 1; + return true; } static unsigned int @@ -183,19 +183,19 @@ ipt_error(struct sk_buff **pskb, } static inline -int do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - int *hotdrop) +bool do_match(struct ipt_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, offset, ip_hdrlen(skb), hotdrop)) - return 1; + return true; else - return 0; + return false; } static inline struct ipt_entry * @@ -204,6 +204,112 @@ get_entry(void *base, unsigned int offset) return (struct ipt_entry *)(base + offset); } +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ipt_ip *ip) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) + if (((__u32 *)ip)[i]) + return 0; + + return 1; +} + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +static const char *hooknames[] = { + [NF_IP_PRE_ROUTING] = "PREROUTING", + [NF_IP_LOCAL_IN] = "INPUT", + [NF_IP_FORWARD] = "FORWARD", + [NF_IP_LOCAL_OUT] = "OUTPUT", + [NF_IP_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP_TRACE_COMMENT_RULE, + NF_IP_TRACE_COMMENT_RETURN, + NF_IP_TRACE_COMMENT_POLICY, +}; + +static const char *comments[] = { + [NF_IP_TRACE_COMMENT_RULE] = "rule", + [NF_IP_TRACE_COMMENT_RETURN] = "return", + [NF_IP_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline int +get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, + char *hookname, char **chainname, + char **comment, unsigned int *rulenum) +{ + struct ipt_standard_target *t = (void *)ipt_get_target(s); + + if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ipt_entry) + && strcmp(t->target.u.kernel.target->name, + IPT_STANDARD_TARGET) == 0 + && t->verdict < 0 + && unconditional(&s->ip)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY] + : (char *)comments[NF_IP_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + char *tablename, + struct xt_table_info *private, + struct ipt_entry *e) +{ + void *table_base; + struct ipt_entry *root; + char *hookname, *chainname, *comment; + unsigned int rulenum = 0; + + table_base = (void *)private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = (char *)hooknames[hook]; + comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE]; + + IPT_ENTRY_ITERATE(root, + private->size - private->hook_entry[hook], + get_chainname_rulenum, + e, hookname, &chainname, &comment, &rulenum); + + nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff **pskb, @@ -216,7 +322,7 @@ ipt_do_table(struct sk_buff **pskb, u_int16_t offset; struct iphdr *ip; u_int16_t datalen; - int hotdrop = 0; + bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; @@ -261,6 +367,14 @@ ipt_do_table(struct sk_buff **pskb, t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + /* The packet is traced: log it */ + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, + table->name, private, e); +#endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -341,19 +455,6 @@ ipt_do_table(struct sk_buff **pskb, #endif } -/* All zeroes == unconditional rule. */ -static inline int -unconditional(const struct ipt_ip *ip) -{ - unsigned int i; - - for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) - if (((__u32 *)ip)[i]) - return 0; - - return 1; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -2105,16 +2206,16 @@ void ipt_unregister_table(struct xt_table *table) } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline int +static inline bool icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, - int invert) + bool invert) { return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code)) ^ invert; } -static int +static bool icmp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -2122,14 +2223,14 @@ icmp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct icmphdr _icmph, *ic; const struct ipt_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); if (ic == NULL) { @@ -2137,8 +2238,8 @@ icmp_match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return icmp_type_code_match(icmpinfo->type, @@ -2149,7 +2250,7 @@ icmp_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, @@ -2163,7 +2264,7 @@ icmp_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct xt_target ipt_standard_target = { +static struct xt_target ipt_standard_target __read_mostly = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET, @@ -2174,7 +2275,7 @@ static struct xt_target ipt_standard_target = { #endif }; -static struct xt_target ipt_error_target = { +static struct xt_target ipt_error_target __read_mostly = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, @@ -2195,9 +2296,10 @@ static struct nf_sockopt_ops ipt_sockopts = { #ifdef CONFIG_COMPAT .compat_get = compat_do_ipt_get_ctl, #endif + .owner = THIS_MODULE, }; -static struct xt_match icmp_matchstruct = { +static struct xt_match icmp_matchstruct __read_mostly = { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), @@ -2230,7 +2332,7 @@ static int __init ip_tables_init(void) if (ret < 0) goto err5; - printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); + printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 40e273421398..69bd362b5fa2 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -19,7 +19,6 @@ #include <linux/udp.h> #include <linux/icmp.h> #include <linux/if_arp.h> -#include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> @@ -30,14 +29,6 @@ #define CLUSTERIP_VERSION "0.8" -#define DEBUG_CLUSTERIP - -#ifdef DEBUG_CLUSTERIP -#define DEBUGP printk -#else -#define DEBUGP -#endif - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables target for CLUSTERIP"); @@ -122,9 +113,8 @@ __clusterip_config_find(__be32 clusterip) list_for_each(pos, &clusterip_configs) { struct clusterip_config *c = list_entry(pos, struct clusterip_config, list); - if (c->clusterip == clusterip) { + if (c->clusterip == clusterip) return c; - } } return NULL; @@ -155,9 +145,8 @@ clusterip_config_init_nodelist(struct clusterip_config *c, { int n; - for (n = 0; n < i->num_local_nodes; n++) { + for (n = 0; n < i->num_local_nodes; n++) set_bit(i->local_nodes[n] - 1, &c->local_nodes); - } } static struct clusterip_config * @@ -220,27 +209,28 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) return 0; } -static int +static bool clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { if (nodenum == 0 || nodenum > c->num_total_nodes) - return 1; + return true; if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return 0; + return false; - return 1; + return true; } #endif static inline u_int32_t -clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) +clusterip_hashfn(const struct sk_buff *skb, + const struct clusterip_config *config) { - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); unsigned long hashval; u_int16_t sport, dport; - u_int16_t *ports; + const u_int16_t *ports; switch (iph->protocol) { case IPPROTO_TCP: @@ -249,15 +239,14 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) case IPPROTO_SCTP: case IPPROTO_DCCP: case IPPROTO_ICMP: - ports = (void *)iph+iph->ihl*4; + ports = (const void *)iph+iph->ihl*4; sport = ports[0]; dport = ports[1]; break; default: - if (net_ratelimit()) { + if (net_ratelimit()) printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", iph->protocol); - } sport = dport = 0; } @@ -285,11 +274,11 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) } /* node numbers are 1..n, not 0..n */ - return ((hashval % config->num_total_nodes)+1); + return (hashval % config->num_total_nodes) + 1; } static inline int -clusterip_responsible(struct clusterip_config *config, u_int32_t hash) +clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) { return test_bit(hash - 1, &config->local_nodes); } @@ -353,15 +342,15 @@ target(struct sk_buff **pskb, break; } -#ifdef DEBUG_CLUSTERP +#ifdef DEBUG DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { - DEBUGP("not responsible\n"); + pr_debug("not responsible\n"); return NF_DROP; } - DEBUGP("responsible\n"); + pr_debug("responsible\n"); /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ @@ -370,7 +359,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -387,50 +376,34 @@ checkentry(const char *tablename, cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", cipinfo->hash_mode); - return 0; + return false; } if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); - return 0; + return false; } /* FIXME: further sanity checks */ config = clusterip_config_find_get(e->ip.dst.s_addr, 1); - if (config) { - if (cipinfo->config != NULL) { - /* Case A: This is an entry that gets reloaded, since - * it still has a cipinfo->config pointer. Simply - * increase the entry refcount and return */ - if (cipinfo->config != config) { - printk(KERN_ERR "CLUSTERIP: Reloaded entry " - "has invalid config pointer!\n"); - return 0; - } - } else { - /* Case B: This is a new rule referring to an existing - * clusterip config. */ - cipinfo->config = config; - } - } else { - /* Case C: This is a completely new clusterip config */ + if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); - return 0; + return false; } else { struct net_device *dev; if (e->ip.iniface[0] == '\0') { printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); - return 0; + return false; } dev = dev_get_by_name(e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); - return 0; + return false; } config = clusterip_config_init(cipinfo, @@ -438,20 +411,20 @@ checkentry(const char *tablename, if (!config) { printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); dev_put(dev); - return 0; + return false; } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } - cipinfo->config = config; } + cipinfo->config = config; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } - return 1; + return true; } /* drop reference count of cluster config when rule is deleted */ @@ -468,13 +441,30 @@ static void destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } -static struct xt_target clusterip_tgt = { +#ifdef CONFIG_COMPAT +struct compat_ipt_clusterip_tgt_info +{ + u_int32_t flags; + u_int8_t clustermac[6]; + u_int16_t num_total_nodes; + u_int16_t num_local_nodes; + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; + u_int32_t hash_mode; + u_int32_t hash_initval; + compat_uptr_t config; +}; +#endif /* CONFIG_COMPAT */ + +static struct xt_target clusterip_tgt __read_mostly = { .name = "CLUSTERIP", .family = AF_INET, .target = target, - .targetsize = sizeof(struct ipt_clusterip_tgt_info), .checkentry = checkentry, .destroy = destroy, + .targetsize = sizeof(struct ipt_clusterip_tgt_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), +#endif /* CONFIG_COMPAT */ .me = THIS_MODULE }; @@ -491,7 +481,7 @@ struct arp_payload { __be32 dst_ip; } __attribute__ ((packed)); -#ifdef CLUSTERIP_DEBUG +#ifdef DEBUG static void arp_print(struct arp_payload *payload) { #define HBUFFERLEN 30 @@ -547,8 +537,9 @@ arp_mangle(unsigned int hook, * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ if (c->dev != out) { - DEBUGP("CLUSTERIP: not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); + pr_debug("CLUSTERIP: not mangling arp reply on different " + "interface: cip'%s'-skb'%s'\n", + c->dev->name, out->name); clusterip_config_put(c); return NF_ACCEPT; } @@ -556,8 +547,8 @@ arp_mangle(unsigned int hook, /* mangle reply hardware address */ memcpy(payload->src_hw, c->clustermac, arp->ar_hln); -#ifdef CLUSTERIP_DEBUG - DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); +#ifdef DEBUG + pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); arp_print(payload); #endif @@ -647,7 +638,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations clusterip_seq_ops = { +static const struct seq_operations clusterip_seq_ops = { .start = clusterip_seq_start, .next = clusterip_seq_next, .stop = clusterip_seq_stop, diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 918ca92e534a..f1253bd3837f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -24,8 +24,8 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. - * return 0 if there was an error. */ -static inline int + * return false if there was an error. */ +static inline bool set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct iphdr *iph = ip_hdr(*pskb); @@ -33,18 +33,18 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return 0; + return false; iph = ip_hdr(*pskb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } - return 1; + return true; } -/* Return 0 if there was an error. */ -static inline int +/* Return false if there was an error. */ +static inline bool set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; @@ -54,16 +54,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_tcph), &_tcph); if (!tcph) - return 0; + return false; if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || tcph->ece == einfo->proto.tcp.ece) && - ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || - tcph->cwr == einfo->proto.tcp.cwr))) - return 1; + (!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr)) + return true; if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) - return 0; + return false; tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); oldval = ((__be16 *)tcph)[6]; @@ -74,7 +74,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) nf_proto_csum_replace2(&tcph->check, *pskb, oldval, ((__be16 *)tcph)[6], 0); - return 1; + return true; } static unsigned int @@ -99,7 +99,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -112,23 +112,23 @@ checkentry(const char *tablename, if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", einfo->operation); - return 0; + return false; } if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n", einfo->ip_ect); - return 0; + return false; } if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_target ipt_ecn_reg = { +static struct xt_target ipt_ecn_reg __read_mostly = { .name = "ECN", .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 361be2bc7eea..127a5e89bf14 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -27,12 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables syslog logging module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Use lock to serialize, so printks don't overlap */ static DEFINE_SPINLOCK(log_lock); @@ -41,7 +35,8 @@ static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { - struct iphdr _iph, *ih; + struct iphdr _iph; + const struct iphdr *ih; unsigned int logflags; if (info->type == NF_LOG_TYPE_LOG) @@ -100,7 +95,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (ih->protocol) { case IPPROTO_TCP: { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ printk("PROTO=TCP "); @@ -151,7 +147,7 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IPT_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; - unsigned char *op; + const unsigned char *op; unsigned int i, optsize; optsize = th->doff * 4 - sizeof(struct tcphdr); @@ -173,7 +169,8 @@ static void dump_packet(const struct nf_loginfo *info, } case IPPROTO_UDP: case IPPROTO_UDPLITE: { - struct udphdr _udph, *uh; + struct udphdr _udph; + const struct udphdr *uh; if (ih->protocol == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ @@ -200,7 +197,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ICMP: { - struct icmphdr _icmph, *ich; + struct icmphdr _icmph; + const struct icmphdr *ich; static const size_t required_len[NR_ICMP_TYPES+1] = { [ICMP_ECHOREPLY] = 4, [ICMP_DEST_UNREACH] @@ -285,7 +283,8 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max Length */ case IPPROTO_AH: { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; if (ntohs(ih->frag_off) & IP_OFFSET) break; @@ -307,7 +306,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ESP: { - struct ip_esp_hdr _esph, *eh; + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; /* Max length: 10 "PROTO=ESP " */ printk("PROTO=ESP "); @@ -385,11 +385,13 @@ ipt_log_packet(unsigned int pf, out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) { - struct net_device *physindev = skb->nf_bridge->physindev; - struct net_device *physoutdev = skb->nf_bridge->physoutdev; + const struct net_device *physindev; + const struct net_device *physoutdev; + physindev = skb->nf_bridge->physindev; if (physindev && in != physindev) printk("PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; if (physoutdev && out != physoutdev) printk("PHYSOUT=%s ", physoutdev->name); } @@ -435,27 +437,27 @@ ipt_log_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ipt_log_checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_log_checkentry(const char *tablename, + const void *e, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; if (loginfo->level >= 8) { - DEBUGP("LOG: level %u >= 8\n", loginfo->level); - return 0; + pr_debug("LOG: level %u >= 8\n", loginfo->level); + return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - DEBUGP("LOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix)-1]); - return 0; + pr_debug("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); + return false; } - return 1; + return true; } -static struct xt_target ipt_log_reg = { +static struct xt_target ipt_log_reg __read_mostly = { .name = "LOG", .family = AF_INET, .target = ipt_log_target, diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d4f2d7775330..7c4e4be7c8b3 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -27,17 +27,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables MASQUERADE target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Lock protects masq region inside conntrack */ static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static int +static bool masquerade_check(const char *tablename, const void *e, const struct xt_target *target, @@ -47,14 +41,14 @@ masquerade_check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("masquerade_check: bad MAP_IPS.\n"); - return 0; + pr_debug("masquerade_check: bad MAP_IPS.\n"); + return false; } if (mr->rangesize != 1) { - DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); - return 0; + pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize); + return false; } - return 1; + return true; } static unsigned int @@ -70,7 +64,7 @@ masquerade_target(struct sk_buff **pskb, enum ip_conntrack_info ctinfo; struct nf_nat_range newrange; const struct nf_nat_multi_range_compat *mr; - struct rtable *rt; + const struct rtable *rt; __be32 newsrc; NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); @@ -109,10 +103,10 @@ masquerade_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static inline int +static int device_cmp(struct nf_conn *i, void *ifindex) { - struct nf_conn_nat *nat = nfct_nat(i); + const struct nf_conn_nat *nat = nfct_nat(i); int ret; if (!nat) @@ -129,7 +123,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + const struct net_device *dev = ptr; if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -147,7 +141,7 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; if (event == NETDEV_DOWN) { /* IP address was deleted. Search entire table for @@ -169,7 +163,7 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static struct xt_target masquerade = { +static struct xt_target masquerade __read_mostly = { .name = "MASQUERADE", .family = AF_INET, .target = masquerade_target, diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 068c69bce30e..41a011d5a065 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -18,18 +18,11 @@ #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_nat_rule.h> -#define MODULENAME "NETMAP" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int +static bool check(const char *tablename, const void *e, const struct xt_target *target, @@ -39,14 +32,14 @@ check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); - return 0; + pr_debug("NETMAP:check: bad MAP_IPS.\n"); + return false; } if (mr->rangesize != 1) { - DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize); - return 0; + pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize); + return false; } - return 1; + return true; } static unsigned int @@ -85,8 +78,8 @@ target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target target_module = { - .name = MODULENAME, +static struct xt_target target_module __read_mostly = { + .name = "NETMAP", .family = AF_INET, .target = target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 68cc76a198eb..6ac7a2373316 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -25,14 +25,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables REDIRECT target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* FIXME: Take multiple ranges --RR */ -static int +static bool redirect_check(const char *tablename, const void *e, const struct xt_target *target, @@ -42,14 +36,14 @@ redirect_check(const char *tablename, const struct nf_nat_multi_range_compat *mr = targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("redirect_check: bad MAP_IPS.\n"); - return 0; + pr_debug("redirect_check: bad MAP_IPS.\n"); + return false; } if (mr->rangesize != 1) { - DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); - return 0; + pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize); + return false; } - return 1; + return true; } static unsigned int @@ -101,7 +95,7 @@ redirect_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target redirect_reg = { +static struct xt_target redirect_reg __read_mostly = { .name = "REDIRECT", .family = AF_INET, .target = redirect_target, diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 9041e0741f6f..cb038c8fbc9d 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -31,12 +31,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables REJECT target module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { @@ -122,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->check = 0; tcph->check = tcp_v4_check(sizeof(struct tcphdr), niph->saddr, niph->daddr, - csum_partial((char *)tcph, + csum_partial(tcph, sizeof(struct tcphdr), 0)); /* Set DF, id = 0 */ @@ -217,30 +211,30 @@ static unsigned int reject(struct sk_buff **pskb, return NF_DROP; } -static int check(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; const struct ipt_entry *e = e_void; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - printk("REJECT: ECHOREPLY no longer supported.\n"); - return 0; + printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); + return false; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { - DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); - return 0; + printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); + return false; } } - return 1; + return true; } -static struct xt_target ipt_reject_reg = { +static struct xt_target ipt_reject_reg __read_mostly = { .name = "REJECT", .family = AF_INET, .target = reject, diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 511e5ff84938..97641f1a97f6 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -27,13 +27,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>"); MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int +static bool same_check(const char *tablename, const void *e, const struct xt_target *target, @@ -46,58 +40,56 @@ same_check(const char *tablename, mr->ipnum = 0; if (mr->rangesize < 1) { - DEBUGP("same_check: need at least one dest range.\n"); - return 0; + pr_debug("same_check: need at least one dest range.\n"); + return false; } if (mr->rangesize > IPT_SAME_MAX_RANGE) { - DEBUGP("same_check: too many ranges specified, maximum " - "is %u ranges\n", - IPT_SAME_MAX_RANGE); - return 0; + pr_debug("same_check: too many ranges specified, maximum " + "is %u ranges\n", IPT_SAME_MAX_RANGE); + return false; } for (count = 0; count < mr->rangesize; count++) { if (ntohl(mr->range[count].min_ip) > ntohl(mr->range[count].max_ip)) { - DEBUGP("same_check: min_ip is larger than max_ip in " - "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", - NIPQUAD(mr->range[count].min_ip), - NIPQUAD(mr->range[count].max_ip)); - return 0; + pr_debug("same_check: min_ip is larger than max_ip in " + "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", + NIPQUAD(mr->range[count].min_ip), + NIPQUAD(mr->range[count].max_ip)); + return false; } if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP("same_check: bad MAP_IPS.\n"); - return 0; + pr_debug("same_check: bad MAP_IPS.\n"); + return false; } rangeip = (ntohl(mr->range[count].max_ip) - ntohl(mr->range[count].min_ip) + 1); mr->ipnum += rangeip; - DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip); + pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip); } - DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum); + pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum); mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL); if (!mr->iparray) { - DEBUGP("same_check: Couldn't allocate %u bytes " - "for %u ipaddresses!\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); - return 0; + pr_debug("same_check: Couldn't allocate %Zu bytes " + "for %u ipaddresses!\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + return false; } - DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); for (count = 0; count < mr->rangesize; count++) { for (countess = ntohl(mr->range[count].min_ip); countess <= ntohl(mr->range[count].max_ip); countess++) { mr->iparray[index] = countess; - DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' " - "in index %u.\n", - HIPQUAD(countess), index); + pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' " + "in index %u.\n", HIPQUAD(countess), index); index++; } } - return 1; + return true; } static void @@ -107,8 +99,8 @@ same_destroy(const struct xt_target *target, void *targinfo) kfree(mr->iparray); - DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); + pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n", + (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); } static unsigned int @@ -146,10 +138,9 @@ same_target(struct sk_buff **pskb, new_ip = htonl(same->iparray[aindex]); - DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, " - "new src=%u.%u.%u.%u\n", - NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip), - NIPQUAD(new_ip)); + pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, " + "new src=%u.%u.%u.%u\n", + NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip)); /* Transfer from original range. */ newrange = ((struct nf_nat_range) @@ -161,7 +152,7 @@ same_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &newrange, hooknum); } -static struct xt_target same_reg = { +static struct xt_target same_reg __read_mostly = { .name = "SAME", .family = AF_INET, .target = same_target, diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 0ad02f249837..25f5d0b39065 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -43,7 +43,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *e_void, const struct xt_target *target, @@ -58,12 +58,12 @@ checkentry(const char *tablename, && tos != IPTOS_MINCOST && tos != IPTOS_NORMALSVC) { printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); - return 0; + return false; } - return 1; + return true; } -static struct xt_target ipt_tos_reg = { +static struct xt_target ipt_tos_reg __read_mostly = { .name = "TOS", .family = AF_INET, .target = target, diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index a991ec7bd4e7..2b54e7b0cfe8 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -62,25 +62,25 @@ ipt_ttl_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ipt_ttl_checkentry(const char *tablename, +static bool ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", info->mode); - return 0; + return false; } - if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) - return 0; - return 1; + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return false; + return true; } -static struct xt_target ipt_TTL = { +static struct xt_target ipt_TTL __read_mostly = { .name = "TTL", .family = AF_INET, .target = ipt_ttl_target, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 23b607b33b32..6ca43e4ca7e3 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -55,13 +55,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - #define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) static unsigned int nlbufsiz = NLMSG_GOODSIZE; @@ -96,12 +89,12 @@ static void ulog_send(unsigned int nlgroupnum) ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; if (timer_pending(&ub->timer)) { - DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n"); + pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n"); del_timer(&ub->timer); } if (!ub->skb) { - DEBUGP("ipt_ULOG: ulog_send: nothing to send\n"); + pr_debug("ipt_ULOG: ulog_send: nothing to send\n"); return; } @@ -110,8 +103,8 @@ static void ulog_send(unsigned int nlgroupnum) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; - DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", - ub->qlen, nlgroupnum + 1); + pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); ub->qlen = 0; @@ -123,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n"); + pr_debug("ipt_ULOG: timer function called, calling ulog_send\n"); /* lock to protect against somebody modifying our structure * from ipt_ulog_target at the same time */ @@ -179,12 +172,10 @@ static void ipt_ulog_packet(unsigned int hooknum, unsigned int groupnum = ffs(loginfo->nl_group) - 1; /* calculate the size of the skb needed */ - if ((loginfo->copy_range == 0) || - (loginfo->copy_range > skb->len)) { + if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) copy_len = skb->len; - } else { + else copy_len = loginfo->copy_range; - } size = NLMSG_SPACE(sizeof(*pm) + copy_len); @@ -206,8 +197,8 @@ static void ipt_ulog_packet(unsigned int hooknum, goto alloc_failure; } - DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen, - loginfo->qthreshold); + pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen, + loginfo->qthreshold); /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, @@ -257,9 +248,8 @@ static void ipt_ulog_packet(unsigned int hooknum, BUG(); /* check if we are building multi-part messages */ - if (ub->qlen > 1) { + if (ub->qlen > 1) ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - } ub->lastnlh = nlh; @@ -328,25 +318,25 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static int ipt_ulog_checkentry(const char *tablename, - const void *e, - const struct xt_target *target, - void *targinfo, - unsigned int hookmask) +static bool ipt_ulog_checkentry(const char *tablename, + const void *e, + const struct xt_target *target, + void *targinfo, + unsigned int hookmask) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + const struct ipt_ulog_info *loginfo = targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { - DEBUGP("ipt_ULOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix) - 1]); - return 0; + pr_debug("ipt_ULOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix) - 1]); + return false; } if (loginfo->qthreshold > ULOG_MAX_QLEN) { - DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n", - loginfo->qthreshold); - return 0; + pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n", + loginfo->qthreshold); + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT @@ -359,7 +349,7 @@ struct compat_ipt_ulog_info { static void compat_from_user(void *dst, void *src) { - struct compat_ipt_ulog_info *cl = src; + const struct compat_ipt_ulog_info *cl = src; struct ipt_ulog_info l = { .nl_group = cl->nl_group, .copy_range = cl->copy_range, @@ -372,7 +362,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct ipt_ulog_info *l = src; + const struct ipt_ulog_info *l = src; struct compat_ipt_ulog_info cl = { .nl_group = l->nl_group, .copy_range = l->copy_range, @@ -384,7 +374,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target ipt_ulog_reg = { +static struct xt_target ipt_ulog_reg __read_mostly = { .name = "ULOG", .family = AF_INET, .target = ipt_ulog_target, @@ -408,7 +398,7 @@ static int __init ipt_ulog_init(void) { int ret, i; - DEBUGP("ipt_ULOG: init module\n"); + pr_debug("ipt_ULOG: init module\n"); if (nlbufsiz > 128*1024) { printk("Netlink buffer has to be <= 128kB\n"); @@ -440,7 +430,7 @@ static void __exit ipt_ulog_fini(void) ulog_buff_t *ub; int i; - DEBUGP("ipt_ULOG: cleanup_module\n"); + pr_debug("ipt_ULOG: cleanup_module\n"); if (nflog) nf_log_unregister(&ipt_ulog_logger); @@ -451,7 +441,7 @@ static void __exit ipt_ulog_fini(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; if (timer_pending(&ub->timer)) { - DEBUGP("timer was pending, deleting\n"); + pr_debug("timer was pending, deleting\n"); del_timer(&ub->timer); } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index a652a1451552..59f01f7ba6b4 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -22,19 +22,19 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("iptables addrtype match"); -static inline int match_type(__be32 addr, u_int16_t mask) +static inline bool match_type(__be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_addr_type(addr))); } -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_addrtype_info *info = matchinfo; const struct iphdr *iph = ip_hdr(skb); - int ret = 1; + bool ret = true; if (info->source) ret &= match_type(iph->saddr, info->source)^info->invert_source; @@ -44,7 +44,7 @@ static int match(const struct sk_buff *skb, return ret; } -static struct xt_match addrtype_match = { +static struct xt_match addrtype_match __read_mostly = { .name = "addrtype", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 18a16782cf40..61b017fd743c 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -25,10 +25,10 @@ MODULE_DESCRIPTION("iptables AH SPI match module"); #endif /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r=0; + bool r; duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', min,spi,max); r=(spi >= min && spi <= max) ^ invert; @@ -36,7 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -44,14 +44,15 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; const struct ipt_ah *ahinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; ah = skb_header_pointer(skb, protoff, sizeof(_ahdr), &_ahdr); @@ -60,7 +61,7 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = 1; + *hotdrop = true; return 0; } @@ -70,7 +71,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, @@ -82,12 +83,12 @@ checkentry(const char *tablename, /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags); - return 0; + return false; } - return 1; + return true; } -static struct xt_match ah_match = { +static struct xt_match ah_match __read_mostly = { .name = "ah", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 26218122f865..d6925c674069 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -22,95 +22,96 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables ECN matching module"); MODULE_LICENSE("GPL"); -static inline int match_ip(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo) +static inline bool match_ip(const struct sk_buff *skb, + const struct ipt_ecn_info *einfo) { return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; } -static inline int match_tcp(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo, - int *hotdrop) +static inline bool match_tcp(const struct sk_buff *skb, + const struct ipt_ecn_info *einfo, + bool *hotdrop) { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) { - *hotdrop = 0; - return 0; + *hotdrop = false; + return false; } if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { if (th->ece == 1) - return 0; + return false; } else { if (th->ece == 0) - return 0; + return false; } } if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { if (th->cwr == 1) - return 0; + return false; } else { if (th->cwr == 0) - return 0; + return false; } } - return 1; + return true; } -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ecn_info *info = matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) - return 0; + return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return 0; + return false; if (!match_tcp(skb, info, hotdrop)) - return 0; + return false; } - return 1; + return true; } -static int checkentry(const char *tablename, const void *ip_void, - const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *ip_void, + const struct xt_match *match, + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; if (info->operation & IPT_ECN_OP_MATCH_MASK) - return 0; + return false; if (info->invert & IPT_ECN_OP_MATCH_MASK) - return 0; + return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && ip->proto != IPPROTO_TCP) { printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" " non-tcp packets\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_match ecn_match = { +static struct xt_match ecn_match __read_mostly = { .name = "ecn", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 33af9e940887..0106dc955a69 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -17,53 +17,47 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_DESCRIPTION("iptables arbitrary IP range match module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_iprange_info *info = matchinfo; const struct iphdr *iph = ip_hdr(skb); if (info->flags & IPRANGE_SRC) { - if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) - || (ntohl(iph->saddr) > ntohl(info->src.max_ip))) + if ((ntohl(iph->saddr) < ntohl(info->src.min_ip) + || ntohl(iph->saddr) > ntohl(info->src.max_ip)) ^ !!(info->flags & IPRANGE_SRC_INV)) { - DEBUGP("src IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), - info->flags & IPRANGE_SRC_INV ? "(INV) " : "", - NIPQUAD(info->src.min_ip), - NIPQUAD(info->src.max_ip)); - return 0; + pr_debug("src IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->saddr), + info->flags & IPRANGE_SRC_INV ? "(INV) " : "", + NIPQUAD(info->src.min_ip), + NIPQUAD(info->src.max_ip)); + return false; } } if (info->flags & IPRANGE_DST) { - if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip)) - || (ntohl(iph->daddr) > ntohl(info->dst.max_ip))) + if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip) + || ntohl(iph->daddr) > ntohl(info->dst.max_ip)) ^ !!(info->flags & IPRANGE_DST_INV)) { - DEBUGP("dst IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->daddr), - info->flags & IPRANGE_DST_INV ? "(INV) " : "", - NIPQUAD(info->dst.min_ip), - NIPQUAD(info->dst.max_ip)); - return 0; + pr_debug("dst IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->daddr), + info->flags & IPRANGE_DST_INV ? "(INV) " : "", + NIPQUAD(info->dst.min_ip), + NIPQUAD(info->dst.max_ip)); + return false; } } - return 1; + return true; } -static struct xt_match iprange_match = { +static struct xt_match iprange_match __read_mostly = { .name = "iprange", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 7fae9aa8944c..b14e77da7a33 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables owner match"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -29,29 +29,29 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ipt_owner_info *info = matchinfo; if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; + return false; if(info->match & IPT_OWNER_UID) { if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) - return 0; + return false; } if(info->match & IPT_OWNER_GID) { if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) - return 0; + return false; } - return 1; + return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -63,12 +63,12 @@ checkentry(const char *tablename, if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { printk("ipt_owner: pid, sid and command matching " "not supported anymore\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_match owner_match = { +static struct xt_match owner_match __read_mostly = { .name = "owner", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 15a9e8bbb7cc..6d0c0f7364ad 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -163,24 +163,23 @@ static void recent_table_flush(struct recent_table *t) struct recent_entry *e, *next; unsigned int i; - for (i = 0; i < ip_list_hash_size; i++) { + for (i = 0; i < ip_list_hash_size; i++) list_for_each_entry_safe(e, next, &t->iphash[i], list) recent_entry_remove(t, e); - } } -static int +static bool ipt_recent_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; struct recent_entry *e; __be32 addr; u_int8_t ttl; - int ret = info->invert; + bool ret = info->invert; if (info->side == IPT_RECENT_DEST) addr = ip_hdr(skb)->daddr; @@ -201,16 +200,16 @@ ipt_recent_match(const struct sk_buff *skb, goto out; e = recent_entry_init(t, addr, ttl); if (e == NULL) - *hotdrop = 1; - ret ^= 1; + *hotdrop = true; + ret = !ret; goto out; } if (info->check_set & IPT_RECENT_SET) - ret ^= 1; + ret = !ret; else if (info->check_set & IPT_RECENT_REMOVE) { recent_entry_remove(t, e); - ret ^= 1; + ret = !ret; } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { unsigned long t = jiffies - info->seconds * HZ; unsigned int i, hits = 0; @@ -219,7 +218,7 @@ ipt_recent_match(const struct sk_buff *skb, if (info->seconds && time_after(t, e->stamps[i])) continue; if (++hits >= info->hit_count) { - ret ^= 1; + ret = !ret; break; } } @@ -235,7 +234,7 @@ out: return ret; } -static int +static bool ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) @@ -243,24 +242,24 @@ ipt_recent_checkentry(const char *tablename, const void *ip, const struct ipt_recent_info *info = matchinfo; struct recent_table *t; unsigned i; - int ret = 0; + bool ret = false; if (hweight8(info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE | IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) - return 0; + return false; if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) - return 0; + return false; if (info->name[0] == '\0' || strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) - return 0; + return false; mutex_lock(&recent_mutex); t = recent_table_lookup(info->name); if (t != NULL) { t->refcnt++; - ret = 1; + ret = true; goto out; } @@ -287,7 +286,7 @@ ipt_recent_checkentry(const char *tablename, const void *ip, spin_lock_bh(&recent_lock); list_add_tail(&t->list, &tables); spin_unlock_bh(&recent_lock); - ret = 1; + ret = true; out: mutex_unlock(&recent_mutex); return ret; @@ -323,18 +322,16 @@ struct recent_iter_state { static void *recent_seq_start(struct seq_file *seq, loff_t *pos) { struct recent_iter_state *st = seq->private; - struct recent_table *t = st->table; + const struct recent_table *t = st->table; struct recent_entry *e; loff_t p = *pos; spin_lock_bh(&recent_lock); - for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) { - list_for_each_entry(e, &t->iphash[st->bucket], list) { + for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) + list_for_each_entry(e, &t->iphash[st->bucket], list) if (p-- == 0) return e; - } - } return NULL; } @@ -373,7 +370,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations recent_seq_ops = { +static const struct seq_operations recent_seq_ops = { .start = recent_seq_start, .next = recent_seq_next, .stop = recent_seq_stop, @@ -390,12 +387,17 @@ static int recent_seq_open(struct inode *inode, struct file *file) st = kzalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) return -ENOMEM; + ret = seq_open(file, &recent_seq_ops); - if (ret) + if (ret) { kfree(st); + goto out; + } + st->table = pde->data; seq = file->private_data; seq->private = st; +out: return ret; } @@ -463,7 +465,7 @@ static const struct file_operations recent_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct xt_match recent_match = { +static struct xt_match recent_match __read_mostly = { .name = "recent", .family = AF_INET, .match = ipt_recent_match, diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index d314844af12b..e740441c973d 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -18,7 +18,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables TOS match module"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -26,14 +26,14 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ipt_tos_info *info = matchinfo; return (ip_hdr(skb)->tos == info->tos) ^ info->invert; } -static struct xt_match tos_match = { +static struct xt_match tos_match __read_mostly = { .name = "tos", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index ab02d9e3139c..a439900a4ba5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,37 +18,33 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IP tables TTL matching module"); MODULE_LICENSE("GPL"); -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ipt_ttl_info *info = matchinfo; const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { case IPT_TTL_EQ: - return (ttl == info->ttl); - break; + return ttl == info->ttl; case IPT_TTL_NE: - return (!(ttl == info->ttl)); - break; + return ttl != info->ttl; case IPT_TTL_LT: - return (ttl < info->ttl); - break; + return ttl < info->ttl; case IPT_TTL_GT: - return (ttl > info->ttl); - break; + return ttl > info->ttl; default: printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", info->mode); - return 0; + return false; } - return 0; + return false; } -static struct xt_match ttl_match = { +static struct xt_match ttl_match __read_mostly = { .name = "ttl", .family = AF_INET, .match = match, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 6dc72a815f77..f813e02aab30 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -24,12 +24,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -84,34 +78,24 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) return skb; } -static int -ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { - /* Never happen */ - if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) { - printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", - ip_hdr(*pskb)->protocol, hooknum); - } - return -NF_DROP; - } - - *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb); - *protonum = ip_hdr(*pskb)->protocol; + struct iphdr _iph, *iph; - return NF_ACCEPT; -} + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return -NF_DROP; -int nf_nat_module_is_loaded = 0; -EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded); + /* Conntrack defragments packets, we might still see fragments + * inside ICMP packets though. */ + if (iph->frag_off & htons(IP_OFFSET)) + return -NF_DROP; -static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) -{ - if (nf_nat_module_is_loaded) - return NF_CT_F_NAT; + *dataoff = nhoff + (iph->ihl << 2); + *protonum = iph->protocol; - return NF_CT_F_BASIC; + return NF_ACCEPT; } static unsigned int ipv4_confirm(unsigned int hooknum, @@ -335,17 +319,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->sk_prot->name, "TCP")) { - DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { - DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", - *len, sizeof(struct sockaddr_in)); + pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", + *len, sizeof(struct sockaddr_in)); return -EINVAL; } - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -357,17 +341,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) .tuple.dst.u3.ip; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); - DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); nf_ct_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } - DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } @@ -415,17 +399,17 @@ static struct nf_sockopt_ops so_getorigdst = { .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst, + .owner = THIS_MODULE, }; -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, .print_conntrack = ipv4_print_conntrack, - .prepare = ipv4_prepare, - .get_features = ipv4_get_features, + .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv4_tuple_to_nfattr, .nfattr_to_tuple = ipv4_nfattr_to_tuple, @@ -522,3 +506,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); + +void need_ipv4_conntrack(void) +{ + return; +} +EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 89f933e81035..b3dd5de9a258 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -18,12 +18,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #ifdef CONFIG_NF_CT_ACCT static unsigned int seq_print_counters(struct seq_file *s, @@ -41,35 +35,36 @@ struct ct_iter_state { unsigned int bucket; }; -static struct list_head *ct_get_first(struct seq_file *seq) +static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!list_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].next; + if (!hlist_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].first; } return NULL; } -static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +static struct hlist_node *ct_get_next(struct seq_file *seq, + struct hlist_node *head) { struct ct_iter_state *st = seq->private; head = head->next; - while (head == &nf_conntrack_hash[st->bucket]) { + while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].next; + head = nf_conntrack_hash[st->bucket].first; } return head; } -static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *head = ct_get_first(seq); + struct hlist_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) @@ -169,7 +164,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations ct_seq_ops = { +static const struct seq_operations ct_seq_ops = { .start = ct_seq_start, .next = ct_seq_next, .stop = ct_seq_stop, @@ -182,7 +177,7 @@ static int ct_open(struct inode *inode, struct file *file) struct ct_iter_state *st; int ret; - st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); if (st == NULL) return -ENOMEM; ret = seq_open(file, &ct_seq_ops); @@ -190,7 +185,6 @@ static int ct_open(struct inode *inode, struct file *file) goto out_free; seq = file->private_data; seq->private = st; - memset(st, 0, sizeof(struct ct_iter_state)); return ret; out_free: kfree(st); @@ -206,47 +200,68 @@ static const struct file_operations ct_file_ops = { }; /* expects */ -static void *exp_seq_start(struct seq_file *s, loff_t *pos) +struct ct_expect_iter_state { + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct list_head *e = &nf_conntrack_expect_list; - loff_t i; + struct ct_expect_iter_state *st = seq->private; - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) + return nf_ct_expect_hash[st->bucket].first; + } + return NULL; +} - if (list_empty(e)) - return NULL; +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct ct_expect_iter_state *st = seq->private; - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_conntrack_expect_list) + head = head->next; + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) return NULL; + head = nf_ct_expect_hash[st->bucket].first; } - return e; + return head; } -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *e = v; + struct hlist_node *head = ct_expect_get_first(seq); - ++*pos; - e = e->next; + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} - if (e == &nf_conntrack_expect_list) - return NULL; +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_expect_get_idx(seq, *pos); +} - return e; +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); } -static void exp_seq_stop(struct seq_file *s, void *v) +static void exp_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&nf_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) { - struct nf_conntrack_expect *exp = v; + struct nf_conntrack_expect *exp; + struct hlist_node *n = v; + + exp = hlist_entry(n, struct nf_conntrack_expect, hnode); if (exp->tuple.src.l3num != AF_INET) return 0; @@ -266,7 +281,7 @@ static int exp_seq_show(struct seq_file *s, void *v) return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, @@ -275,7 +290,22 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open(file, &exp_seq_ops); + struct seq_file *seq; + struct ct_expect_iter_state *st; + int ret; + + st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (!st) + return -ENOMEM; + ret = seq_open(file, &exp_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + return ret; +out_free: + kfree(st); + return ret; } static const struct file_operations ip_exp_file_ops = { @@ -283,7 +313,7 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) @@ -354,7 +384,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ct_cpu_seq_ops = { +static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, .stop = ct_cpu_seq_stop, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 474b4cea8b82..6593fd2c5b10 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -21,12 +21,6 @@ static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -125,8 +119,8 @@ static int icmp_new(struct nf_conn *conntrack, if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ - DEBUGP("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); + pr_debug("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); return 0; } @@ -142,54 +136,36 @@ icmp_error_message(struct sk_buff *skb, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } _in, *inside; struct nf_conntrack_l4proto *innerproto; struct nf_conntrack_tuple_hash *h; - int dataoff; NF_CT_ASSERT(skb->nfct == NULL); - /* Not enough header? */ - inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in); - if (inside == NULL) - return -NF_ACCEPT; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside->ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_message: fragment of proto %u\n", - inside->ip.protocol); + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + ip_hdrlen(skb) + + sizeof(struct icmphdr), + PF_INET, &origtuple)) { + pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - - dataoff = ip_hdrlen(skb) + sizeof(inside->icmp); - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, - inside->ip.protocol, &origtuple, - &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: ! get_tuple p=%u", - inside->ip.protocol); - return -NF_ACCEPT; - } + innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&innertuple, &origtuple, &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: no match\n"); + pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple, NULL); + h = nf_conntrack_find_get(&innertuple); if (!h) { - DEBUGP("icmp_error_message: no match\n"); + pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } @@ -336,7 +312,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, @@ -362,4 +338,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = #endif #endif }; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_icmp); diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f17098917bc..bd93a1d71052 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -45,7 +45,7 @@ static unsigned int help(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -57,7 +57,7 @@ static unsigned int help(struct sk_buff **pskb, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index ea02f00d2dac..deab27facbad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/timer.h> #include <linux/skbuff.h> -#include <linux/vmalloc.h> #include <net/checksum.h> #include <net/icmp.h> #include <net/ip.h> @@ -32,20 +31,15 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static DEFINE_RWLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto = NULL; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size; +static int nf_nat_vmalloced; -static struct list_head *bysource; +static struct hlist_head *bysource; #define MAX_IP_NAT_PROTO 256 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; @@ -83,23 +77,11 @@ static inline unsigned int hash_by_src(const struct nf_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.u3.ip, + (__force u32)tuple->src.u.all, tuple->dst.protonum, 0) % nf_nat_htable_size; } -/* Noone using conntrack by the time this called. */ -static void nf_nat_cleanup_conntrack(struct nf_conn *conn) -{ - struct nf_conn_nat *nat; - if (!(conn->status & IPS_NAT_DONE_MASK)) - return; - - nat = nfct_nat(conn); - write_lock_bh(&nf_nat_lock); - list_del(&nat->info.bysource); - write_unlock_bh(&nf_nat_lock); -} - /* Is this tuple already taken? (not by us) */ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, @@ -166,10 +148,11 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, unsigned int h = hash_by_src(tuple); struct nf_conn_nat *nat; struct nf_conn *ct; + struct hlist_node *n; read_lock_bh(&nf_nat_lock); - list_for_each_entry(nat, &bysource[h], info.bysource) { - ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data)); + hlist_for_each_entry(nat, n, &bysource[h], bysource) { + ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, @@ -254,7 +237,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { - DEBUGP("get_unique_tuple: Found current src map\n"); + pr_debug("get_unique_tuple: Found current src map\n"); if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) if (!nf_nat_used_tuple(tuple, ct)) return; @@ -296,11 +279,20 @@ nf_nat_setup_info(struct nf_conn *ct, unsigned int hooknum) { struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_info *info = &nat->info; + struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + /* nat helper or nfctnetlink also setup binding */ + nat = nfct_nat(ct); + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN || @@ -337,7 +329,10 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); write_lock_bh(&nf_nat_lock); - list_add(&info->bysource, &bysource[srchash]); + /* nf_conntrack_alter_reply might re-allocate exntension aera */ + nat = nfct_nat(ct); + nat->ct = ct; + hlist_add_head(&nat->bysource, &bysource[srchash]); write_unlock_bh(&nf_nat_lock); } @@ -462,8 +457,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, return 0; } - DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", - *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + pr_debug("icmp_reply_translation: translating error %p manip %u " + "dir %s\n", *pskb, manip, + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); @@ -590,17 +586,69 @@ nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); #endif +/* Noone using conntrack by the time this called. */ +static void nf_nat_cleanup_conntrack(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (nat == NULL || nat->ct == NULL) + return; + + NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); + + write_lock_bh(&nf_nat_lock); + hlist_del(&nat->bysource); + nat->ct = NULL; + write_unlock_bh(&nf_nat_lock); +} + +static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) +{ + struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); + struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; + struct nf_conn *ct = old_nat->ct; + unsigned int srchash; + + if (!(ct->status & IPS_NAT_DONE_MASK)) + return; + + srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + write_lock_bh(&nf_nat_lock); + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + new_nat->ct = ct; + write_unlock_bh(&nf_nat_lock); +} + +static struct nf_ct_ext_type nat_extend __read_mostly = { + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .destroy = nf_nat_cleanup_conntrack, + .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, +}; + static int __init nf_nat_init(void) { size_t i; + int ret; + + ret = nf_ct_extend_register(&nat_extend); + if (ret < 0) { + printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + return ret; + } /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - /* One vmalloc for both hash tables */ - bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size); - if (!bysource) - return -ENOMEM; + bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &nf_nat_vmalloced); + if (!bysource) { + ret = -ENOMEM; + goto cleanup_extend; + } /* Sew in builtin protocols. */ write_lock_bh(&nf_nat_lock); @@ -612,18 +660,18 @@ static int __init nf_nat_init(void) write_unlock_bh(&nf_nat_lock); for (i = 0; i < nf_nat_htable_size; i++) { - INIT_LIST_HEAD(&bysource[i]); + INIT_HLIST_HEAD(&bysource[i]); } - /* FIXME: Man, this is a hack. <SIGH> */ - NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL); - rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack); - /* Initialize fake conntrack so that NAT will skip it */ nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); return 0; + + cleanup_extend: + nf_ct_extend_unregister(&nat_extend); + return ret; } /* Clear NAT section of all conntracks, in case we're loaded again. */ @@ -641,10 +689,10 @@ static int clean_nat(struct nf_conn *i, void *data) static void __exit nf_nat_cleanup(void) { nf_ct_iterate_cleanup(&clean_nat, NULL); - rcu_assign_pointer(nf_conntrack_destroyed, NULL); synchronize_rcu(); - vfree(bysource); + nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); nf_ct_l3proto_put(l3proto); + nf_ct_extend_unregister(&nat_extend); } MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index e6bc8e5a72f1..3663bd879c39 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -25,12 +25,6 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp NAT helper"); MODULE_ALIAS("ip_nat_ftp"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* FIXME: Time out? --RR */ static int @@ -47,7 +41,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, sprintf(buffer, "%u,%u,%u,%u,%u,%u", NIPQUAD(newip), port>>8, port&0xFF); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -67,7 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb, sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -87,7 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb, sprintf(buffer, "|||%u|", port); - DEBUGP("calling nf_nat_mangle_tcp_packet\n"); + pr_debug("calling nf_nat_mangle_tcp_packet\n"); return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); @@ -117,7 +111,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, int dir = CTINFO2DIR(ctinfo); struct nf_conn *ct = exp->master; - DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); + pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); /* Connection will come from wherever this packet goes, hence !dir */ newip = ct->tuplehash[!dir].tuple.dst.u3.ip; @@ -131,7 +125,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -139,7 +133,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, return NF_DROP; if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c5d2a2d690b8..c1b059a73708 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -21,12 +21,6 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_h323.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /****************************************************************************/ static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, @@ -126,12 +120,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, (ntohl(addr.ip) & 0xff000000) == 0x7f000000) i = 0; - DEBUGP - ("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.dst. - ip), info->sig_port[!dir]); + pr_debug("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + info->sig_port[!dir]); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, @@ -139,12 +132,11 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && port == info->sig_port[dir]) { /* GK->GW */ - DEBUGP - ("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.src. - ip), info->sig_port[!dir]); + pr_debug("nf_nat_ras: set signal address " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), port, + NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), + info->sig_port[!dir]); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, @@ -171,12 +163,11 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && port == ct->tuplehash[dir].tuple.src.u.udp.port) { - DEBUGP("nf_nat_ras: set rasAddress " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(ip), ntohs(port), - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), - ntohs(ct->tuplehash[!dir].tuple.dst.u.udp. - port)); + pr_debug("nf_nat_ras: set rasAddress " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(addr.ip), ntohs(port), + NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. @@ -237,12 +228,12 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); nated_port != 0; nated_port += 2) { rtp_exp->tuple.dst.u.udp.port = htons(nated_port); - if (nf_conntrack_expect_related(rtp_exp) == 0) { + if (nf_ct_expect_related(rtp_exp) == 0) { rtcp_exp->tuple.dst.u.udp.port = htons(nated_port + 1); - if (nf_conntrack_expect_related(rtcp_exp) == 0) + if (nf_ct_expect_related(rtcp_exp) == 0) break; - nf_conntrack_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtp_exp); } } @@ -261,22 +252,22 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, info->rtp_port[i][dir] = rtp_port; info->rtp_port[i][!dir] = htons(nated_port); } else { - nf_conntrack_unexpect_related(rtp_exp); - nf_conntrack_unexpect_related(rtcp_exp); + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); return -1; } /* Success */ - DEBUGP("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtp_exp->tuple.src.ip), - ntohs(rtp_exp->tuple.src.u.udp.port), - NIPQUAD(rtp_exp->tuple.dst.ip), - ntohs(rtp_exp->tuple.dst.u.udp.port)); - DEBUGP("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtcp_exp->tuple.src.ip), - ntohs(rtcp_exp->tuple.src.u.udp.port), - NIPQUAD(rtcp_exp->tuple.dst.ip), - ntohs(rtcp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtp_exp->tuple.src.u3.ip), + ntohs(rtp_exp->tuple.src.u.udp.port), + NIPQUAD(rtp_exp->tuple.dst.u3.ip), + ntohs(rtp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(rtcp_exp->tuple.src.u3.ip), + ntohs(rtcp_exp->tuple.src.u.udp.port), + NIPQUAD(rtcp_exp->tuple.dst.u3.ip), + ntohs(rtcp_exp->tuple.dst.u.udp.port)); return 0; } @@ -299,7 +290,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -313,13 +304,15 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, if (set_h245_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } - DEBUGP("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -347,7 +340,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -365,13 +358,15 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[dir] = port; info->sig_port[!dir] = htons(nated_port); } else { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } - DEBUGP("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -433,7 +428,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (; nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -460,14 +455,16 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[!dir]); } } else { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } /* Success */ - DEBUGP("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -517,7 +514,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, /* Try to get same port: if not, try to change it. */ for (nated_port = ntohs(port); nated_port != 0; nated_port++) { exp->tuple.dst.u.tcp.port = htons(nated_port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -531,15 +528,17 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, if (!set_h225_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return -1; } /* Success */ - DEBUGP("nf_nat_q931: expect Call Forwarding " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + pr_debug("nf_nat_q931: expect Call Forwarding " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.u3.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.u3.ip), + ntohs(exp->tuple.dst.u.tcp.port)); return 0; } @@ -566,8 +565,6 @@ static int __init init(void) rcu_assign_pointer(nat_h245_hook, nat_h245); rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); rcu_assign_pointer(nat_q931_hook, nat_q931); - - DEBUGP("nf_nat_h323: init success\n"); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 15b6e5ce3a04..93d8a0a8f035 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -26,13 +26,9 @@ #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> -#if 0 -#define DEBUGP printk -#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); -#else -#define DEBUGP(format, args...) -#define DUMP_OFFSET(x) -#endif +#define DUMP_OFFSET(x) \ + pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ + x->offset_before, x->offset_after, x->correction_pos); static DEFINE_SPINLOCK(nf_nat_seqofs_lock); @@ -47,15 +43,15 @@ adjust_tcp_sequence(u32 seq, struct nf_nat_seq *this_way, *other_way; struct nf_conn_nat *nat = nfct_nat(ct); - DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n", - (*skb)->len, new_size); + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + ntohl(seq), seq); dir = CTINFO2DIR(ctinfo); - this_way = &nat->info.seq[dir]; - other_way = &nat->info.seq[!dir]; + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; - DEBUGP("nf_nat_resize_packet: Seq_offset before: "); + pr_debug("nf_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); spin_lock_bh(&nf_nat_seqofs_lock); @@ -72,7 +68,7 @@ adjust_tcp_sequence(u32 seq, } spin_unlock_bh(&nf_nat_seqofs_lock); - DEBUGP("nf_nat_resize_packet: Seq_offset after: "); + pr_debug("nf_nat_resize_packet: Seq_offset after: "); DUMP_OFFSET(this_way); } @@ -100,14 +96,12 @@ static void mangle_contents(struct sk_buff *skb, /* update skb info */ if (rep_len > match_len) { - DEBUGP("nf_nat_mangle_packet: Extending packet by " - "%u from %u bytes\n", rep_len - match_len, - skb->len); + pr_debug("nf_nat_mangle_packet: Extending packet by " + "%u from %u bytes\n", rep_len - match_len, skb->len); skb_put(skb, rep_len - match_len); } else { - DEBUGP("nf_nat_mangle_packet: Shrinking packet from " - "%u from %u bytes\n", match_len - rep_len, - skb->len); + pr_debug("nf_nat_mangle_packet: Shrinking packet from " + "%u from %u bytes\n", match_len - rep_len, skb->len); __skb_trim(skb, skb->len + rep_len - match_len); } @@ -178,7 +172,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + @@ -190,7 +184,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, tcph->check = 0; tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, + csum_partial(tcph, datalen, 0)); } } else @@ -265,7 +259,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_ALL_CSUM) { + (*pskb)->dev->features & NETIF_F_V4_CSUM) { (*pskb)->ip_summed = CHECKSUM_PARTIAL; (*pskb)->csum_start = skb_headroom(*pskb) + skb_network_offset(*pskb) + @@ -278,7 +272,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, - csum_partial((char *)udph, + csum_partial(udph, datalen, 0)); if (!udph->check) udph->check = CSUM_MANGLED_0; @@ -320,9 +314,9 @@ sack_adjust(struct sk_buff *skb, new_end_seq = htonl(ntohl(sack->end_seq) - natseq->offset_before); - DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); nf_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, 0); @@ -372,8 +366,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) sack_adjust(*pskb, tcph, optoff+2, - optoff+op[1], - &nat->info.seq[!dir]); + optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } } @@ -394,8 +387,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb, dir = CTINFO2DIR(ctinfo); - this_way = &nat->info.seq[dir]; - other_way = &nat->info.seq[!dir]; + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) return 0; @@ -415,9 +408,9 @@ nf_nat_seq_adjust(struct sk_buff **pskb, nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); - DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); tcph->seq = newseq; tcph->ack_seq = newack; diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 9b8c0daea744..bcf274bba602 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -22,12 +22,6 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_irc.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); @@ -44,9 +38,6 @@ static unsigned int help(struct sk_buff **pskb, u_int16_t port; unsigned int ret; - DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n", - expect->seq, exp_irc_info->len, ntohl(tcph->seq)); - /* Reply comes from server. */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->dir = IP_CT_DIR_REPLY; @@ -55,7 +46,7 @@ static unsigned int help(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { exp->tuple.dst.u.tcp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -64,14 +55,14 @@ static unsigned int help(struct sk_buff **pskb, ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); sprintf(buffer, "%u %u", ip, port); - DEBUGP("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", + buffer, NIPQUAD(ip), port); ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index a66888749ceb..984ec8308b2e 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -37,14 +37,6 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); MODULE_ALIAS("ip_nat_pptp"); -#if 0 -extern const char *pptp_msg_name[]; -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) -#endif - static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { @@ -60,7 +52,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* And here goes the grand finale of corrosion... */ if (exp->dir == IP_CT_DIR_ORIGINAL) { - DEBUGP("we are PNS->PAC\n"); + pr_debug("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; @@ -69,7 +61,7 @@ static void pptp_nat_expected(struct nf_conn *ct, t.dst.u.gre.key = ct_pptp_info->pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { - DEBUGP("we are PAC->PNS\n"); + pr_debug("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.l3num = AF_INET; t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; @@ -79,15 +71,15 @@ static void pptp_nat_expected(struct nf_conn *ct, t.dst.protonum = IPPROTO_GRE; } - DEBUGP("trying to unexpect other dir: "); + pr_debug("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&t); - other_exp = nf_conntrack_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(&t); if (other_exp) { - nf_conntrack_unexpect_related(other_exp); - nf_conntrack_expect_put(other_exp); - DEBUGP("success\n"); + nf_ct_unexpect_related(other_exp); + nf_ct_expect_put(other_exp); + pr_debug("success\n"); } else { - DEBUGP("not found!\n"); + pr_debug("not found!\n"); } /* This must be a fresh one. */ @@ -161,9 +153,9 @@ pptp_outbound_pkt(struct sk_buff **pskb, cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); break; default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); + pr_debug("unknown outbound packet 0x%04x:%s\n", msg, + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); /* fall through */ case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ @@ -179,8 +171,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass * down to here */ - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + pr_debug("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, @@ -255,8 +247,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); + pr_debug("unknown inbound packet %s\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); /* fall through */ case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: @@ -272,8 +265,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ /* mangle packet */ - DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + pr_debug("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index c3908bc5a709..2e40cc83526a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -36,13 +36,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(x, args...) -#endif - /* is key in given range between min and max */ static int gre_in_range(const struct nf_conntrack_tuple *tuple, @@ -83,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, keyptr = &tuple->dst.u.gre.key; if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - DEBUGP("%p: NATing GRE PPTP\n", conntrack); + pr_debug("%p: NATing GRE PPTP\n", conntrack); min = 1; range_size = 0xffff; } else { @@ -91,7 +84,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + pr_debug("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -99,7 +92,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, return 1; } - DEBUGP("%p: no NAT mapping\n", conntrack); + pr_debug("%p: no NAT mapping\n", conntrack); return 0; } @@ -132,11 +125,11 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, * Try to behave like "nf_nat_proto_unknown" */ break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; default: - DEBUGP("can't nat unknown GRE version\n"); + pr_debug("can't nat unknown GRE version\n"); return 0; } return 1; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6740736c5e79..76ec59ae524d 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -24,12 +24,6 @@ #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_rule.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) static struct @@ -140,39 +134,39 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, return nf_nat_setup_info(ct, &mr->range[0], hooknum); } -static int ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { struct nf_nat_multi_range_compat *mr = targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { printk("SNAT: multiple ranges no longer supported\n"); - return 0; + return false; } - return 1; + return true; } -static int ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { struct nf_nat_multi_range_compat *mr = targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { printk("DNAT: multiple ranges no longer supported\n"); - return 0; + return false; } - return 1; + return true; } -inline unsigned int +unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for @@ -186,8 +180,8 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) struct nf_nat_range range = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; - DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", - ct, NIPQUAD(ip)); + pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", + ct, NIPQUAD(ip)); return nf_nat_setup_info(ct, &range, hooknum); } @@ -198,15 +192,15 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); - u_int16_t all + __be16 all = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); struct nf_nat_range range = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; - DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", - ct, NIPQUAD(ip)); + pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", + ct, NIPQUAD(ip)); return nf_nat_setup_info(ct, &range, hooknum); } @@ -228,7 +222,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct xt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg __read_mostly = { .name = "SNAT", .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), @@ -238,7 +232,7 @@ static struct xt_target ipt_snat_reg = { .family = AF_INET, }; -static struct xt_target ipt_dnat_reg = { +static struct xt_target ipt_dnat_reg __read_mostly = { .name = "DNAT", .target = ipt_dnat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index fac97cf51ae5..e14d41976c27 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -26,12 +26,6 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - struct addr_map { struct { char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; @@ -110,7 +104,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); datalen = (*pskb)->len - dataoff; if (datalen < sizeof("SIP/2.0") - 1) - return NF_DROP; + return NF_ACCEPT; addr_map_init(ct, &map); @@ -257,10 +251,12 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, __be32 newip; u_int16_t port; - DEBUGP("ip_nat_sdp():\n"); - /* Connection will come from reply */ - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + if (ct->tuplehash[dir].tuple.src.u3.ip == + ct->tuplehash[!dir].tuple.dst.u3.ip) + newip = exp->tuple.dst.u3.ip; + else + newip = ct->tuplehash[!dir].tuple.dst.u3.ip; exp->saved_ip = exp->tuple.dst.u3.ip; exp->tuple.dst.u3.ip = newip; @@ -274,7 +270,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { exp->tuple.dst.u.udp.port = htons(port); - if (nf_conntrack_expect_related(exp) == 0) + if (nf_ct_expect_related(exp) == 0) break; } @@ -282,7 +278,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, return NF_DROP; if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 6e88505d6162..6bfcd3a90f08 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1276,9 +1276,6 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }; static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { @@ -1290,9 +1287,6 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }; /***************************************************************************** diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 55dac36dbc85..46cc99def165 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -19,6 +19,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_nat_protocol.h> @@ -26,12 +27,6 @@ #include <net/netfilter/nf_nat_helper.h> #include <linux/netfilter_ipv4/ip_tables.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { @@ -113,8 +108,13 @@ nf_nat_fn(unsigned int hooknum, return NF_ACCEPT; nat = nfct_nat(ct); - if (!nat) - return NF_ACCEPT; + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } switch (ctinfo) { case IP_CT_RELATED: @@ -148,9 +148,9 @@ nf_nat_fn(unsigned int hooknum, return ret; } } else - DEBUGP("Already setup manip %s for ct %p\n", - maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", - ct); + pr_debug("Already setup manip %s for ct %p\n", + maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + ct); break; default: @@ -264,7 +264,7 @@ nf_nat_adjust(unsigned int hooknum, ct = nf_ct_get(*pskb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { - DEBUGP("nf_nat_standalone: adjusting sequence number\n"); + pr_debug("nf_nat_standalone: adjusting sequence number\n"); if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) return NF_DROP; } @@ -326,26 +326,10 @@ static struct nf_hook_ops nf_nat_ops[] = { static int __init nf_nat_standalone_init(void) { - int size, ret = 0; + int ret = 0; - need_conntrack(); + need_ipv4_conntrack(); - size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) + - sizeof(struct nf_conn_nat); - ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size); - if (ret < 0) { - printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); - return ret; - } - - size = ALIGN(size, __alignof__(struct nf_conn_help)) + - sizeof(struct nf_conn_help); - ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP, - "nf_nat:help", size); - if (ret < 0) { - printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n"); - goto cleanup_register_cache; - } #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); ip_nat_decode_session = nat_decode_session; @@ -360,7 +344,6 @@ static int __init nf_nat_standalone_init(void) printk("nf_nat_init: can't register hooks.\n"); goto cleanup_rule_init; } - nf_nat_module_is_loaded = 1; return ret; cleanup_rule_init: @@ -370,9 +353,6 @@ static int __init nf_nat_standalone_init(void) ip_nat_decode_session = NULL; synchronize_net(); #endif - nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP); - cleanup_register_cache: - nf_conntrack_unregister_cache(NF_CT_F_NAT); return ret; } @@ -380,7 +360,6 @@ static void __exit nf_nat_standalone_fini(void) { nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); nf_nat_rule_cleanup(); - nf_nat_module_is_loaded = 0; #ifdef CONFIG_XFRM ip_nat_decode_session = NULL; synchronize_net(); diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 2566b79de224..04dfeaefec02 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff **pskb, = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) return NF_DROP; return NF_ACCEPT; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 24d7c9f31918..c6d71526f625 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -900,8 +900,9 @@ static int raw_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct raw_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct raw_iter_state *s; + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; rc = seq_open(file, &raw_seq_ops); @@ -910,7 +911,6 @@ static int raw_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29ca63e81ced..c7ca94bd152c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -101,7 +101,6 @@ #include <net/tcp.h> #include <net/icmp.h> #include <net/xfrm.h> -#include <net/ip_mp_alg.h> #include <net/netevent.h> #include <net/rtnetlink.h> #ifdef CONFIG_SYSCTL @@ -168,7 +167,7 @@ static struct dst_ops ipv4_dst_ops = { #define ECN_OR_COST(class) TC_PRIO_##class -__u8 ip_tos2prio[16] = { +const __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, ECN_OR_COST(FILLER), TC_PRIO_BESTEFFORT, @@ -375,8 +374,9 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct rt_cache_iter_state *s; + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; rc = seq_open(file, &rt_cache_seq_ops); @@ -384,7 +384,6 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file) goto out_kfree; seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: @@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = { static __inline__ void rt_free(struct rtable *rt) { - multipath_remove(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } static __inline__ void rt_drop(struct rtable *rt) { - multipath_remove(rt); ip_rt_put(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } @@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1->iif ^ fl2->iif)) == 0; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, - struct rtable *expentry, - int *removed_count) -{ - int passedexpired = 0; - struct rtable **nextstep = NULL; - struct rtable **rthp = chain_head; - struct rtable *rth; - - if (removed_count) - *removed_count = 0; - - while ((rth = *rthp) != NULL) { - if (rth == expentry) - passedexpired = 1; - - if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && - compare_keys(&(*rthp)->fl, &expentry->fl)) { - if (*rthp == expentry) { - *rthp = rth->u.dst.rt_next; - continue; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - if (removed_count) - ++(*removed_count); - } - } else { - if (!((*rthp)->u.dst.flags & DST_BALANCED) && - passedexpired && !nextstep) - nextstep = &rth->u.dst.rt_next; - - rthp = &rth->u.dst.rt_next; - } - } - - rt_free(expentry); - if (removed_count) - ++(*removed_count); - - return nextstep; -} -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - - /* This runs via a timer and thus is always in BH context. */ static void rt_check_expire(unsigned long dummy) { @@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy) } /* Cleanup aged off entries. */ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries if necessary */ - if (rth->u.dst.flags & DST_BALANCED) { - rthp = rt_remove_balanced_route( - &rt_hash_table[i].chain, - rth, NULL); - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock(rt_hash_lock_addr(i)); @@ -721,9 +658,6 @@ void rt_cache_flush(int delay) if (delay < 0) delay = ip_rt_min_delay; - /* flush existing multipath state*/ - multipath_flush(); - spin_lock_bh(&rt_flush_lock); if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { @@ -842,30 +776,9 @@ static int rt_garbage_collect(void) rthp = &rth->u.dst.rt_next; continue; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries - * if necessary - */ - if (rth->u.dst.flags & DST_BALANCED) { - int r; - - rthp = rt_remove_balanced_route( - &rt_hash_table[k].chain, - rth, - &r); - goal -= r; - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - goal--; - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); goal--; -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock_bh(rt_hash_lock_addr(k)); if (goal <= 0) @@ -939,12 +852,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (!(rth->u.dst.flags & DST_BALANCED) && - compare_keys(&rth->fl, &rt->fl)) { -#else if (compare_keys(&rth->fl, &rt->fl)) { -#endif /* Put it first */ *rthp = rth->u.dst.rt_next; /* @@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; -#endif if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) @@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb, return err; } -static inline int ip_mkroute_input_def(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static inline int ip_mkroute_input(struct sk_buff *skb, + struct fib_result* res, + const struct flowi *fl, + struct in_device *in_dev, + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } -static inline int ip_mkroute_input(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL, *rtres; - unsigned char hop, hopcount; - int err = -EINVAL; - unsigned int hash; - - if (res->fi) - hopcount = res->fi->fib_nhs; - else - hopcount = 1; - - /* distinguish between multipath and singlepath */ - if (hopcount < 2) - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, - saddr, tos); - - /* add all alternatives to the routing cache */ - for (hop = 0; hop < hopcount; hop++) { - res->nh_sel = hop; - - /* put reference to previous result */ - if (hop) - ip_rt_put(rtres); - - /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, - &rth); - if (err) - return err; - - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); - err = rt_intern_hash(hash, rth, &rtres); - if (err) - return err; - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - } - skb->dst = &rtres->u.dst; - return err; -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -} - - /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi) { - rth->rt_multipath_alg = res->fi->fib_mp_alg; - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; - } -#endif if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) @@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result, return err; } -static inline int ip_mkroute_output_def(struct rtable **rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static inline int ip_mkroute_output(struct rtable **rp, + struct fib_result* res, + const struct flowi *fl, + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned flags) { struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); @@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, return err; } -static inline int ip_mkroute_output(struct rtable** rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - unsigned char hop; - unsigned hash; - int err = -EINVAL; - struct rtable *rth = NULL; - - if (res->fi && res->fi->fib_nhs > 1) { - unsigned char hopcount = res->fi->fib_nhs; - - for (hop = 0; hop < hopcount; hop++) { - struct net_device *dev2nexthop; - - res->nh_sel = hop; - - /* hold a work reference to the output device */ - dev2nexthop = FIB_RES_DEV(*res); - dev_hold(dev2nexthop); - - /* put reference to previous result */ - if (hop) - ip_rt_put(*rp); - - err = __mkroute_output(&rth, res, fl, oldflp, - dev2nexthop, flags); - - if (err != 0) - goto cleanup; - - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, - oldflp->oif); - err = rt_intern_hash(hash, rth, rp); - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - cleanup: - /* release work reference to output device */ - dev_put(dev2nexthop); - - if (err != 0) - return err; - } - return err; - } else { - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, - flags); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); -#endif -} - /* * Major route resolver routine. */ @@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { - - /* check for multipath routes and choose one if - * necessary - */ - if (multipath_select_route(flp, rth, rp)) { - dst_hold(&(*rp)->u.dst); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - return 0; - } - rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2729,10 +2496,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (rt->u.dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) - NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); -#endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) @@ -3204,7 +2967,7 @@ int __init ip_rt_init(void) ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 11ff18258892..7e740112b238 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1117,6 +1117,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; + struct sk_buff *skb; lock_sock(sk); @@ -1143,16 +1144,26 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, #ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); - if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { - preempt_enable_no_resched(); - tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); - } else - preempt_enable_no_resched(); + skb = skb_peek_tail(&sk->sk_receive_queue); + { + int available = 0; + + if (skb) + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); + if ((available < target) && + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + !sysctl_tcp_low_latency && + __get_cpu_var(softnet_data).net_dma) { + preempt_enable_no_resched(); + tp->ucopy.pinned_list = + dma_pin_iovec_pages(msg->msg_iov, len); + } else { + preempt_enable_no_resched(); + } + } #endif do { - struct sk_buff *skb; u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ @@ -1440,7 +1451,6 @@ skip_copy: #ifdef CONFIG_NET_DMA if (tp->ucopy.dma_chan) { - struct sk_buff *skb; dma_cookie_t done, used; dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); @@ -2421,7 +2431,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index dd9ef65ad3ff..4586211e3757 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -137,7 +137,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) +static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1260e52ad772..55fca1820c34 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -324,8 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ebfaac2f9f46..485d7ea35f75 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -246,38 +246,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } - -/* Keep track of minimum rtt */ -static inline void measure_delay(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - u32 delay; - - /* No time stamp */ - if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) || - /* Discard delay samples right after fast recovery */ - (s32)(tcp_time_stamp - ca->epoch_start) < HZ) - return; - - delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3; - if (delay == 0) - delay = 1; - - /* first time call or link delay decreases */ - if (ca->delay_min == 0 || ca->delay_min > delay) - ca->delay_min = delay; -} - static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (data_acked) - measure_delay(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) return; @@ -334,17 +308,33 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) +static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); + struct bictcp *ca = inet_csk_ca(sk); + u32 delay; if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { - struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } -} + /* Some calls are for duplicates without timetamps */ + if (rtt_us < 0) + return; + + /* Discard delay samples right after fast recovery */ + if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + return; + + delay = usecs_to_jiffies(rtt_us) << 3; + if (delay == 0) + delay = 1; + + /* first time call or link delay decreases */ + if (ca->delay_min == 0 || ca->delay_min > delay) + ca->delay_min = delay; +} static struct tcp_congestion_ops cubictcp = { .init = bictcp_init, diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 43d624e5043c..14a073d8b60f 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,7 +109,7 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4ba4a7ae0a85..5215691f2760 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -76,20 +76,17 @@ static u32 htcp_cwnd_undo(struct sock *sk) return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } -static inline void measure_rtt(struct sock *sk) +static inline void measure_rtt(struct sock *sk, u32 srtt) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - u32 srtt = tp->srtt >> 3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ if (ca->minRTT > srtt || !ca->minRTT) ca->minRTT = srtt; /* max RTT */ - if (icsk->icsk_ca_state == TCP_CA_Open - && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt @@ -98,7 +95,7 @@ static inline void measure_rtt(struct sock *sk) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); @@ -108,6 +105,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t if (icsk->icsk_ca_state == TCP_CA_Open) ca->pkts_acked = pkts_acked; + if (rtt > 0) + measure_rtt(sk, usecs_to_jiffies(rtt)); + if (!use_bandwidth_switch) return; @@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -237,8 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); else { - measure_rtt(sk); - /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd */ diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index e5be35117223..b3e55cf56171 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,7 +85,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -103,7 +103,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, return; if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); if (ca->rho == 0) hybla_recalc_param(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index b2b2256d3b84..64f1cbaf96e8 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -83,18 +83,16 @@ static void tcp_illinois_init(struct sock *sk) } /* Measure RTT for each ack. */ -static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) +static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt) { struct illinois *ca = inet_csk_ca(sk); - u32 rtt; ca->acked = pkts_acked; - if (ktime_equal(last, net_invalid_timestamp())) + /* dup ack, no rtt sample */ + if (rtt < 0) return; - rtt = ktime_to_us(net_timedelta(last)); - /* ignore bogus values, this prevents wraparound in alpha math */ if (rtt > RTT_MAX) rtt = RTT_MAX; @@ -258,7 +256,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13abf4eaa051..f893e90061eb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -102,11 +102,14 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) +#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define IsReno(tp) ((tp)->rx_opt.sack_ok == 0) #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) @@ -552,6 +555,16 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_grow_window(sk, skb); } +static u32 tcp_rto_min(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + u32 rto_min = TCP_RTO_MIN; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) + rto_min = dst->metrics[RTAX_RTO_MIN-1]; + return rto_min; +} + /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge @@ -613,13 +626,13 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) if (tp->mdev_max < tp->rttvar) tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = TCP_RTO_MIN; + tp->mdev_max = tcp_rto_min(sk); } } else { /* no previous measure. */ tp->srtt = m<<3; /* take the measured time to be rtt */ tp->mdev = m<<1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); + tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); tp->rtt_seq = tp->snd_nxt; } } @@ -752,7 +765,15 @@ void tcp_update_metrics(struct sock *sk) } } -/* Numbers are taken from RFC2414. */ +/* Numbers are taken from RFC3390. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than use a multiplier in the relevant range. + */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -964,12 +985,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Check for D-SACK. */ if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + flag |= FLAG_DSACKING_ACK; found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1 && !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { + flag |= FLAG_DSACKING_ACK; found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); @@ -1856,7 +1879,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; - if ((flag&FLAG_FORWARD_PROGRESS) || + if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || (IsReno(tp) && !(flag&FLAG_NOT_DUP))) { tp->snd_cwnd_cnt = decr&1; decr >>= 1; @@ -2107,15 +2130,13 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) * tcp_xmit_retransmit_queue(). */ static void -tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, - int prior_packets, int flag) +tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = (tp->snd_una == prior_snd_una && - (!(flag&FLAG_NOT_DUP) || - ((flag&FLAG_DATA_SACKED) && - (tp->fackets_out > tp->reordering)))); + int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); + int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && + (tp->fackets_out > tp->reordering)); /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ @@ -2192,14 +2213,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* F. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: - if (prior_snd_una == tp->snd_una) { + if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (IsReno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) tcp_remove_reno_sacks(sk, acked); - is_dupack = tcp_try_undo_partial(sk, acked); + do_lost = tcp_try_undo_partial(sk, acked); } break; case TCP_CA_Loss: @@ -2215,7 +2236,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Loss is undone; fall through to processing in Open state. */ default: if (IsReno(tp)) { - if (tp->snd_una != prior_snd_una) + if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); if (is_dupack) tcp_add_reno_sack(sk); @@ -2264,7 +2285,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_set_ca_state(sk, TCP_CA_Recovery); } - if (is_dupack || tcp_head_timedout(sk)) + if (do_lost || tcp_head_timedout(sk)) tcp_update_scoreboard(sk); tcp_cwnd_down(sk, flag); tcp_xmit_retransmit_queue(sk); @@ -2329,11 +2350,11 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int good) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -2399,6 +2420,9 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, __u32 dval = min(tp->fackets_out, packets_acked); tp->fackets_out -= dval; } + /* hint's skb might be NULL but we don't need to care */ + tp->fastpath_cnt_hint -= min_t(u32, packets_acked, + tp->fastpath_cnt_hint); tp->packets_out -= packets_acked; BUG_ON(tcp_skb_pcount(skb) == 0); @@ -2496,12 +2520,23 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk); - /* Is the ACK triggering packet unambiguous? */ - if (acked & FLAG_RETRANS_DATA_ACKED) - last_ackt = net_invalid_timestamp(); + if (ca_ops->pkts_acked) { + s32 rtt_us = -1; + + /* Is the ACK triggering packet unambiguous? */ + if (!(acked & FLAG_RETRANS_DATA_ACKED)) { + /* High resolution needed and available? */ + if (ca_ops->flags & TCP_CONG_RTT_STAMP && + !ktime_equal(last_ackt, + net_invalid_timestamp())) + rtt_us = ktime_us_delta(ktime_get_real(), + last_ackt); + else if (seq_rtt > 0) + rtt_us = jiffies_to_usecs(seq_rtt); + } - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, last_ackt); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } #if FASTRETRANS_DEBUG > 0 @@ -2673,7 +2708,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) * to prove that the RTO is indeed spurious. It transfers the control * from F-RTO to the conventional RTO recovery */ -static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) +static int tcp_process_frto(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2693,8 +2728,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate */ - if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && - !(flag&FLAG_FORWARD_PROGRESS)) + if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP)) return 1; if (!(flag&FLAG_DATA_ACKED)) { @@ -2774,6 +2808,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) goto old_ack; + if (after(ack, prior_snd_una)) + flag |= FLAG_SND_UNA_ADVANCED; + if (sysctl_tcp_abc) { if (icsk->icsk_ca_state < TCP_CA_CWR) tp->bytes_acked += ack - prior_snd_una; @@ -2826,17 +2863,17 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, &seq_rtt); if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag); + frto_cwnd = tcp_process_frto(sk, flag); if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); - tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); + tcp_cong_avoid(sk, ack, prior_in_flight, 0); + tcp_fastretrans_alert(sk, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fa36e1aede3e..e089a978e128 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -833,8 +833,7 @@ static struct tcp_md5sig_key * return NULL; for (i = 0; i < tp->md5sig_info->entries4; i++) { if (tp->md5sig_info->keys4[i].addr == addr) - return (struct tcp_md5sig_key *) - &tp->md5sig_info->keys4[i]; + return &tp->md5sig_info->keys4[i].base; } return NULL; } @@ -865,9 +864,9 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); if (key) { /* Pre-existing entry - just update that one. */ - kfree(key->key); - key->key = newkey; - key->keylen = newkeylen; + kfree(key->base.key); + key->base.key = newkey; + key->base.keylen = newkeylen; } else { struct tcp_md5sig_info *md5sig; @@ -906,9 +905,9 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, md5sig->alloced4++; } md5sig->entries4++; - md5sig->keys4[md5sig->entries4 - 1].addr = addr; - md5sig->keys4[md5sig->entries4 - 1].key = newkey; - md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen; + md5sig->keys4[md5sig->entries4 - 1].addr = addr; + md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; + md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; } return 0; } @@ -930,7 +929,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) for (i = 0; i < tp->md5sig_info->entries4; i++) { if (tp->md5sig_info->keys4[i].addr == addr) { /* Free the key */ - kfree(tp->md5sig_info->keys4[i].key); + kfree(tp->md5sig_info->keys4[i].base.key); tp->md5sig_info->entries4--; if (tp->md5sig_info->entries4 == 0) { @@ -964,7 +963,7 @@ static void tcp_v4_clear_md5_list(struct sock *sk) if (tp->md5sig_info->entries4) { int i; for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].key); + kfree(tp->md5sig_info->keys4[i].base.key); tp->md5sig_info->entries4 = 0; tcp_free_md5sig_pool(); } @@ -2045,10 +2044,7 @@ static void *established_get_first(struct seq_file *seq) struct hlist_node *node; struct inet_timewait_sock *tw; - /* We can reschedule _before_ having picked the target: */ - cond_resched_softirq(); - - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; @@ -2065,7 +2061,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2092,14 +2088,11 @@ get_tw: cur = tw; goto out; } - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); st->state = TCP_SEQ_STATE_ESTABLISHED; - /* We can reschedule between buckets: */ - cond_resched_softirq(); - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; @@ -2144,7 +2137,6 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2177,7 +2169,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) rc = listening_get_next(seq, v); if (!rc) { inet_listen_unlock(&tcp_hashinfo); - local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2209,8 +2200,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); - local_bh_enable(); + read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); break; } } diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e49836ce012e..e7f5ef92cbd8 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } /** @@ -261,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ -static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) +static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - if (!ktime_equal(last, net_invalid_timestamp())) - tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); + if (rtt_us > 0) + tcp_lp_rtt_sample(sk, rtt_us); /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53232dd6fb48..666d8a58d14a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -699,6 +699,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; + /* SACK fastpath might overwrite it unless dealt with */ + if (tp->fastpath_skb_hint != NULL && + after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, + TCP_SKB_CB(skb)->seq)) { + tp->fastpath_cnt_hint -= diff; + if ((int)tp->fastpath_cnt_hint < 0) + tp->fastpath_cnt_hint = 0; + } } } @@ -1607,7 +1615,7 @@ u32 __tcp_select_window(struct sock *sk) if (window <= free_space - mss || window > free_space) window = (free_space/mss)*mss; else if (mss == full_space && - free_space > window + full_space/2) + free_space > window + full_space/2) window = free_space; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d9323dfff826..b76398d1b454 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -6,8 +6,7 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * the Free Software Foundation; either version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -25,23 +24,22 @@ #include <linux/tcp.h> #include <linux/proc_fs.h> #include <linux/module.h> -#include <linux/kfifo.h> #include <linux/ktime.h> #include <linux/time.h> -#include <linux/vmalloc.h> #include <net/tcp.h> MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); +MODULE_VERSION("1.1"); static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize __read_mostly = 64*1024; -MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +static int bufsize __read_mostly = 4096; +MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, int, 0); static int full __read_mostly; @@ -50,39 +48,38 @@ module_param(full, int, 0); static const char procname[] = "tcpprobe"; -struct { - struct kfifo *fifo; +struct tcp_log { + ktime_t tstamp; + __be32 saddr, daddr; + __be16 sport, dport; + u16 length; + u32 snd_nxt; + u32 snd_una; + u32 snd_wnd; + u32 snd_cwnd; + u32 ssthresh; + u32 srtt; +}; + +static struct { spinlock_t lock; wait_queue_head_t wait; ktime_t start; u32 lastcwnd; -} tcpw; -/* - * Print to log with timestamps. - * FIXME: causes an extra copy - */ -static void printl(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); + unsigned long head, tail; + struct tcp_log *log; +} tcp_probe; + -static void printl(const char *fmt, ...) +static inline int tcp_probe_used(void) { - va_list args; - int len; - struct timespec tv; - char tbuf[256]; - - va_start(args, fmt); - /* want monotonic time since start of tcp_probe */ - tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); - - len = sprintf(tbuf, "%lu.%09lu ", - (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); - len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); - va_end(args); - - kfifo_put(tcpw.fifo, tbuf, len); - wake_up(&tcpw.wait); + return (tcp_probe.head - tcp_probe.tail) % bufsize; +} + +static inline int tcp_probe_avail(void) +{ + return bufsize - tcp_probe_used(); } /* @@ -97,63 +94,118 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Only update if port matches */ if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) - && (full || tp->snd_cwnd != tcpw.lastcwnd)) { - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), - skb->len, tp->snd_nxt, tp->snd_una, - tp->snd_cwnd, tcp_current_ssthresh(sk), - tp->snd_wnd, tp->srtt >> 3); - tcpw.lastcwnd = tp->snd_cwnd; + && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { + + spin_lock(&tcp_probe.lock); + /* If log fills, just silently drop */ + if (tcp_probe_avail() > 1) { + struct tcp_log *p = tcp_probe.log + tcp_probe.head; + + p->tstamp = ktime_get(); + p->saddr = inet->saddr; + p->sport = inet->sport; + p->daddr = inet->daddr; + p->dport = inet->dport; + p->length = skb->len; + p->snd_nxt = tp->snd_nxt; + p->snd_una = tp->snd_una; + p->snd_cwnd = tp->snd_cwnd; + p->snd_wnd = tp->snd_wnd; + p->ssthresh = tcp_current_ssthresh(sk); + p->srtt = tp->srtt >> 3; + + tcp_probe.head = (tcp_probe.head + 1) % bufsize; + } + tcp_probe.lastcwnd = tp->snd_cwnd; + spin_unlock(&tcp_probe.lock); + + wake_up(&tcp_probe.wait); } jprobe_return(); return 0; } -static struct jprobe tcp_probe = { +static struct jprobe tcp_jprobe = { .kp = { .symbol_name = "tcp_rcv_established", }, - .entry = JPROBE_ENTRY(jtcp_rcv_established), + .entry = jtcp_rcv_established, }; - static int tcpprobe_open(struct inode * inode, struct file * file) { - kfifo_reset(tcpw.fifo); - tcpw.start = ktime_get(); + /* Reset (empty) log */ + spin_lock_bh(&tcp_probe.lock); + tcp_probe.head = tcp_probe.tail = 0; + tcp_probe.start = ktime_get(); + spin_unlock_bh(&tcp_probe.lock); + return 0; } +static int tcpprobe_sprint(char *tbuf, int n) +{ + const struct tcp_log *p + = tcp_probe.log + tcp_probe.tail % bufsize; + struct timespec tv + = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); + + return snprintf(tbuf, n, + "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" + " %d %#x %#x %u %u %u %u\n", + (unsigned long) tv.tv_sec, + (unsigned long) tv.tv_nsec, + NIPQUAD(p->saddr), ntohs(p->sport), + NIPQUAD(p->daddr), ntohs(p->dport), + p->length, p->snd_nxt, p->snd_una, + p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); +} + static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { int error = 0, cnt = 0; - unsigned char *tbuf; if (!buf || len < 0) return -EINVAL; - if (len == 0) - return 0; + while (cnt < len) { + char tbuf[128]; + int width; + + /* Wait for data in buffer */ + error = wait_event_interruptible(tcp_probe.wait, + tcp_probe_used() > 0); + if (error) + break; - tbuf = vmalloc(len); - if (!tbuf) - return -ENOMEM; + spin_lock_bh(&tcp_probe.lock); + if (tcp_probe.head == tcp_probe.tail) { + /* multiple readers race? */ + spin_unlock_bh(&tcp_probe.lock); + continue; + } - error = wait_event_interruptible(tcpw.wait, - __kfifo_len(tcpw.fifo) != 0); - if (error) - goto out_free; + width = tcpprobe_sprint(tbuf, sizeof(tbuf)); - cnt = kfifo_get(tcpw.fifo, tbuf, len); - error = copy_to_user(buf, tbuf, cnt); + if (width < len) + tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; -out_free: - vfree(tbuf); + spin_unlock_bh(&tcp_probe.lock); + + /* if record greater than space available + return partial buffer (so far) */ + if (width >= len) + break; + + error = copy_to_user(buf + cnt, tbuf, width); + if (error) + break; + cnt += width; + } - return error ? error : cnt; + return cnt == 0 ? error : cnt; } static const struct file_operations tcpprobe_fops = { @@ -166,34 +218,37 @@ static __init int tcpprobe_init(void) { int ret = -ENOMEM; - init_waitqueue_head(&tcpw.wait); - spin_lock_init(&tcpw.lock); - tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); - if (IS_ERR(tcpw.fifo)) - return PTR_ERR(tcpw.fifo); + init_waitqueue_head(&tcp_probe.wait); + spin_lock_init(&tcp_probe.lock); + + if (bufsize < 0) + return -EINVAL; + + tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + if (!tcp_probe.log) + goto err0; if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) goto err0; - ret = register_jprobe(&tcp_probe); + ret = register_jprobe(&tcp_jprobe); if (ret) goto err1; - pr_info("TCP watch registered (port=%d)\n", port); + pr_info("TCP probe registered (port=%d)\n", port); return 0; err1: proc_net_remove(procname); err0: - kfifo_free(tcpw.fifo); + kfree(tcp_probe.log); return ret; } module_init(tcpprobe_init); static __exit void tcpprobe_exit(void) { - kfifo_free(tcpw.fifo); proc_net_remove(procname); - unregister_jprobe(&tcp_probe); - + unregister_jprobe(&tcp_jprobe); + kfree(tcp_probe.log); } module_exit(tcpprobe_exit); diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 4624501e9680..be27a33a1c68 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,7 +15,7 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index e218a51ceced..b49dedcda52d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -112,16 +112,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init); * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; - if (ktime_equal(last, net_invalid_timestamp())) + if (rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = ktime_to_us(net_timedelta(last)) + 1; + vrtt = rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) @@ -163,13 +163,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -228,7 +228,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd, diff; diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 502fa8183634..6c0eea2f8249 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -17,7 +17,7 @@ struct vegas { extern void tcp_vegas_init(struct sock *sk); extern void tcp_vegas_state(struct sock *sk, u8 ca_state); -extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last); +extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ec854cc5fad5..8fb2aee0b1a4 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -69,16 +69,16 @@ static void tcp_veno_init(struct sock *sk) } /* Do rtt sampling needed for Veno. */ -static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) { struct veno *veno = inet_csk_ca(sk); u32 vrtt; - if (ktime_equal(last, net_invalid_timestamp())) + if (rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = ktime_to_us(net_timedelta(last)) + 1; + vrtt = rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) @@ -115,13 +115,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) } static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* limited by applications */ if (!tcp_is_cwnd_limited(sk, in_flight)) @@ -132,7 +132,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd; diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index e61e09dd513e..20151d6a6241 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -100,11 +100,12 @@ static void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt) { struct westwood *w = inet_csk_ca(sk); - if (cnt > 0) - w->rtt = tcp_sk(sk)->srtt >> 3; + + if (rtt > 0) + w->rtt = usecs_to_jiffies(rtt); } /* diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 545ed237ab53..c107fba7430e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -58,7 +58,7 @@ static void tcp_yeah_init(struct sock *sk) } -static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) +static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); struct yeah *yeah = inet_csk_ca(sk); @@ -66,11 +66,11 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) if (icsk->icsk_ca_state == TCP_CA_Open) yeah->pkts_acked = pkts_acked; - tcp_vegas_pkts_acked(sk, pkts_acked, last); + tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index facb7e29304e..69d4bd10f9c6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -70,6 +70,7 @@ * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support + * James Chapman : Add L2TP encapsulation type. * * * This program is free software; you can redistribute it and/or @@ -504,6 +505,8 @@ send: out: up->len = 0; up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -692,10 +695,8 @@ out: ip_rt_put(rt); if (free) kfree(ipc.opt); - if (!err) { - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise @@ -919,104 +920,6 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } -/* return: - * 1 if the UDP system should process it - * 0 if we should drop this packet - * -1 if it should get processed by xfrm4_rcv_encap - */ -static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) -{ -#ifndef CONFIG_XFRM - return 1; -#else - struct udp_sock *up = udp_sk(sk); - struct udphdr *uh; - struct iphdr *iph; - int iphlen, len; - - __u8 *udpdata; - __be32 *udpdata32; - __u16 encap_type = up->encap_type; - - /* if we're overly short, let UDP handle it */ - len = skb->len - sizeof(struct udphdr); - if (len <= 0) - return 1; - - /* if this is not encapsulated socket, then just return now */ - if (!encap_type) - return 1; - - /* If this is a paged skb, make sure we pull up - * whatever data we need to look at. */ - if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) - return 1; - - /* Now we can get the pointers */ - uh = udp_hdr(skb); - udpdata = (__u8 *)uh + sizeof(struct udphdr); - udpdata32 = (__be32 *)udpdata; - - switch (encap_type) { - default: - case UDP_ENCAP_ESPINUDP: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return 0; - } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { - /* ESP Packet without Non-ESP header */ - len = sizeof(struct udphdr); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return 0; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; - } - - /* At this point we are sure that this is an ESPinUDP packet, - * so we need to remove 'len' bytes from the packet (the UDP - * header and optional ESP marker bytes) and then modify the - * protocol to ESP, and then call into the transform receiver. - */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return 0; - - /* Now we can update and verify the packet length... */ - iph = ip_hdr(skb); - iphlen = iph->ihl << 2; - iph->tot_len = htons(ntohs(iph->tot_len) - len); - if (skb->len < iphlen + len) { - /* packet is too small!?! */ - return 0; - } - - /* pull the data buffer up to the ESP header and set the - * transport header to point to ESP. Keep UDP on the stack - * for later. - */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); - - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - - /* and let the caller know to send this into the ESP processor... */ - return -1; -#endif -} - /* returns: * -1: error * 0: success @@ -1039,28 +942,28 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (up->encap_type) { /* - * This is an encapsulation socket, so let's see if this is - * an encapsulated packet. - * If it's a keepalive packet, then just eat it. - * If it's an encapsulateed packet, then pass it to the - * IPsec xfrm input and return the response - * appropriately. Otherwise, just fall through and - * pass this up the UDP socket. + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N */ - int ret; - ret = udp_encap_rcv(sk, skb); - if (ret == 0) { - /* Eat the packet .. */ - kfree_skb(skb); - return 0; - } - if (ret < 0) { - /* process the ESP packet */ - ret = xfrm4_rcv_encap(skb, up->encap_type); - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); - return -ret; + /* if we're overly short, let UDP handle it */ + if (skb->len > sizeof(struct udphdr) && + up->encap_rcv != NULL) { + int ret; + + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + return -ret; + } } + /* FALLTHROUGH -- it's a UDP Packet */ } @@ -1349,6 +1252,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: + up->encap_rcv = xfrm4_udp_encap_rcv; + /* FALLTHROUGH */ + case UDP_ENCAP_L2TPINUDP: up->encap_type = val; break; default: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index fa1902dc81b8..2fa108245413 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -16,13 +16,6 @@ #include <net/ip.h> #include <net/xfrm.h> -int xfrm4_rcv(struct sk_buff *skb) -{ - return xfrm4_rcv_encap(skb, 0); -} - -EXPORT_SYMBOL(xfrm4_rcv); - static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { switch (nexthdr) { @@ -53,7 +46,7 @@ drop: } #endif -int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { __be32 spi, seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; @@ -167,3 +160,108 @@ drop: kfree_skb(skb); return 0; } + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct iphdr *iph; + int iphlen, len; + int ret; + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + iph = ip_hdr(skb); + iphlen = iph->ihl << 2; + iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (skb->len < iphlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* modify the protocol (it's ESP!) */ + iph->protocol = IPPROTO_ESP; + + /* process ESP */ + ret = xfrm4_rcv_encap(skb, encap_type); + return ret; + +drop: + kfree_skb(skb); + return 0; +} + +int xfrm4_rcv(struct sk_buff *skb) +{ + return xfrm4_rcv_encap(skb, 0); +} + +EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 568510304553..9275c79119b6 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -109,3 +109,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 8e5d54f23b49..eb0b8085949b 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -109,7 +109,7 @@ config INET6_IPCOMP If unsure, say Y. config IPV6_MIP6 - bool "IPv6: Mobility (EXPERIMENTAL)" + tristate "IPv6: Mobility (EXPERIMENTAL)" depends on IPV6 && EXPERIMENTAL select XFRM ---help--- diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index bb33309044c9..87c23a73d284 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -14,7 +14,6 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o -ipv6-$(CONFIG_IPV6_MIP6) += mip6.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-objs += $(ipv6-y) @@ -28,6 +27,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o +obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2cc3728f6a2d..45b4c82148a0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1021,7 +1021,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, hiscore.rule++; } if (ipv6_saddr_preferred(score.addr_type) || - (((ifa_result->flags & + (((ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { score.attrs |= IPV6_SADDR_SCORE_PREFERRED; if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { @@ -1034,7 +1034,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, } /* Rule 4: Prefer home address */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (hiscore.rule < 4) { if (ifa_result->flags & IFA_F_HOMEADDRESS) hiscore.attrs |= IPV6_SADDR_SCORE_HOA; @@ -2256,18 +2256,21 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; + int err; switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); if (!idev) - printk(KERN_WARNING "IPv6: add_dev failed for %s\n", - dev->name); + return notifier_from_errno(-ENOMEM); } break; case NETDEV_UP: case NETDEV_CHANGE: + if (dev->flags & IFF_SLAVE) + break; + if (event == NETDEV_UP) { if (!netif_carrier_ok(dev)) { /* device is not ready yet. */ @@ -2370,7 +2373,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, NULL); addrconf_sysctl_register(idev, &idev->cnf); #endif - snmp6_register_dev(idev); + err = snmp6_register_dev(idev); + if (err) + return notifier_from_errno(err); } break; } @@ -2783,7 +2788,7 @@ static int if6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations if6_seq_ops = { +static const struct seq_operations if6_seq_ops = { .start = if6_seq_start, .next = if6_seq_next, .show = if6_seq_show, @@ -2833,7 +2838,7 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct in6_addr *addr) { @@ -4241,7 +4246,6 @@ errout: void __exit addrconf_cleanup(void) { struct net_device *dev; - struct inet6_dev *idev; struct inet6_ifaddr *ifa; int i; @@ -4259,7 +4263,7 @@ void __exit addrconf_cleanup(void) */ for_each_netdev(dev) { - if ((idev = __in6_dev_get(dev)) == NULL) + if (__in6_dev_get(dev) == NULL) continue; addrconf_ifdown(dev, 1); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index faaefb692298..3f82e9542eda 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -50,6 +50,9 @@ int __ipv6_addr_type(const struct in6_addr *addr) if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */ + if ((st & htonl(0xFE000000)) == htonl(0xFC000000)) + return (IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* RFC 4193 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b1a7755162b6..b5f96372ad73 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -58,9 +58,6 @@ #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif -#ifdef CONFIG_IPV6_MIP6 -#include <net/mip6.h> -#endif #include <asm/uaccess.h> #include <asm/system.h> @@ -853,9 +850,6 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); -#ifdef CONFIG_IPV6_MIP6 - mip6_init(); -#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -921,9 +915,7 @@ static void __exit inet6_exit(void) /* Cleanup code parts. */ ipv6_packet_cleanup(); -#ifdef CONFIG_IPV6_MIP6 - mip6_fini(); -#endif + addrconf_cleanup(); ip6_flowlabel_cleanup(); ip6_route_cleanup(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 128f94c79c64..53f46ab6af70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -74,7 +74,7 @@ bad: return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -132,6 +132,8 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des bad: return; } +#else +static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {} #endif /** @@ -189,10 +191,8 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) while (exthdr.raw < end) { switch (nexthdr) { case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); -#endif case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( @@ -228,7 +228,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; char tmp_base[8]; struct { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -255,7 +255,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) err = -ENOMEM; goto error; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(tmp_ext, &top_iph->saddr, extlen); #else memcpy(tmp_ext, &top_iph->daddr, extlen); @@ -294,7 +294,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) memcpy(&top_iph->saddr, tmp_ext, extlen); #else memcpy(&top_iph->daddr, tmp_ext, extlen); @@ -554,3 +554,4 @@ module_init(ah6_init); module_exit(ah6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 2f49578e52a1..b8c533fbdb63 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -66,7 +66,6 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } rcu_read_unlock(); return onlink; @@ -540,7 +539,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ac6_seq_ops = { +static const struct seq_operations ac6_seq_ops = { .start = ac6_seq_start, .next = ac6_seq_next, .stop = ac6_seq_stop, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b1fe7ac5dc90..fe0f49024a0a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -657,11 +657,10 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: -#endif break; +#endif default: err = -EINVAL; goto exit_f; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 7107bb7e2e62..2db31ce3c7e6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -421,3 +421,4 @@ module_init(esp6_init); module_exit(esp6_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 14be0b9b77a5..c82d4d49f71f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -42,7 +42,7 @@ #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include <net/xfrm.h> #endif @@ -90,6 +90,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) bad: return -1; } +EXPORT_SYMBOL_GPL(ipv6_find_tlv); /* * Parsing tlv encoded headers. @@ -196,7 +197,7 @@ bad: Destination options header. *****************************/ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) { struct sk_buff *skb = *skbp; @@ -270,7 +271,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) #endif static struct tlvtype_proc tlvprocdestopt_lst[] = { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -283,7 +284,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif struct dst_entry *dst; @@ -298,7 +299,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) dstbuf = opt->dst1; #endif @@ -308,7 +309,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -371,22 +372,13 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) struct rt0_hdr *rthdr; int accept_source_route = ipv6_devconf.accept_source_route; - if (accept_source_route < 0 || - ((idev = in6_dev_get(skb->dev)) == NULL)) { - kfree_skb(skb); - return -1; - } - if (idev->cnf.accept_source_route < 0) { + idev = in6_dev_get(skb->dev); + if (idev) { + if (accept_source_route > idev->cnf.accept_source_route) + accept_source_route = idev->cnf.accept_source_route; in6_dev_put(idev); - kfree_skb(skb); - return -1; } - if (accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; - - in6_dev_put(idev); - if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { @@ -398,24 +390,6 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); - switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: - break; -#endif - case IPV6_SRCRT_TYPE_0: - if (accept_source_route > 0) - break; - kfree_skb(skb); - return -1; - default: - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - (&hdr->type) - skb_network_header(skb)); - return -1; - } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), @@ -427,7 +401,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -453,18 +427,10 @@ looped_back: } switch (hdr->type) { - case IPV6_SRCRT_TYPE_0: - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((&hdr->hdrlen) - - skb_network_header(skb))); - return -1; - } - break; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (accept_source_route < 0) + goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), @@ -474,6 +440,8 @@ looped_back: } break; #endif + default: + goto unknown_rh; } /* @@ -520,7 +488,7 @@ looped_back: addr += i - 1; switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, @@ -577,6 +545,12 @@ looped_back: skb_push(skb, skb->data - skb_network_header(skb)); dst_input(skb); return -1; + +unknown_rh: + IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + (&hdr->type) - skb_network_header(skb)); + return -1; } static struct inet6_protocol rthdr_protocol = { @@ -590,72 +564,6 @@ void __init ipv6_rthdr_init(void) printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); }; -/* - This function inverts received rthdr. - NOTE: specs allow to make it automatically only if - packet authenticated. - - I will not discuss it here (though, I am really pissed off at - this stupid requirement making rthdr idea useless) - - Actually, it creates severe problems for us. - Embryonic requests has no associated sockets, - so that user have no control over it and - cannot not only to set reply options, but - even to know, that someone wants to connect - without success. :-( - - For now we need to test the engine, so that I created - temporary (or permanent) backdoor. - If listening socket set IPV6_RTHDR to 2, then we invert header. - --ANK (980729) - */ - -struct ipv6_txoptions * -ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) -{ - /* Received rthdr: - - [ H1 -> H2 -> ... H_prev ] daddr=ME - - Inverted result: - [ H_prev -> ... -> H1 ] daddr =sender - - Note, that IP output engine will rewrite this rthdr - by rotating it left by one addr. - */ - - int n, i; - struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr; - struct rt0_hdr *irthdr; - struct ipv6_txoptions *opt; - int hdrlen = ipv6_optlen(hdr); - - if (hdr->segments_left || - hdr->type != IPV6_SRCRT_TYPE_0 || - hdr->hdrlen & 0x01) - return NULL; - - n = hdr->hdrlen >> 1; - opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC); - if (opt == NULL) - return NULL; - memset(opt, 0, sizeof(*opt)); - opt->tot_len = sizeof(*opt) + hdrlen; - opt->srcrt = (void*)(opt+1); - opt->opt_nflen = hdrlen; - - memcpy(opt->srcrt, hdr, sizeof(*hdr)); - irthdr = (struct rt0_hdr*)opt->srcrt; - irthdr->reserved = 0; - opt->srcrt->segments_left = n; - for (i=0; i<n; i++) - memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16); - return opt; -} - -EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); - /********************************** Hop-by-hop options. **********************************/ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c956037c8066..6a6714d154ed 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -272,7 +272,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 662a7d9681fd..6a612a701eaa 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1474,7 +1474,7 @@ void __init fib6_init(void) fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); fib6_tables_init(); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c206a152ed9d..413a4ebb195c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -648,7 +648,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ip6fl_seq_ops = { +static const struct seq_operations ip6fl_seq_ops = { .start = ip6fl_seq_start, .next = ip6fl_seq_next, .stop = ip6fl_seq_stop, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9f9e930972f..26de3c0ea31e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -521,6 +521,10 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + to->nf_trace = from->nf_trace; +#endif skb_copy_secmark(to, from); } @@ -543,7 +547,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) found_rhdr = 1; break; case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif @@ -1423,8 +1427,9 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), - IPSTATS_MIB_OUTDISCARDS); + if (skb->dst) + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 31f92525799b..ca774d8e3be3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -385,7 +385,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, - int *type, int *code, int *msg, __be32 *info, int offset) + int *type, int *code, int *msg, __u32 *info, int offset) { struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; struct ip6_tnl *t; @@ -435,7 +435,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if ((*code) == ICMPV6_HDR_FIELD) teli = parse_tlv_tnl_enc_lim(skb, skb->data); - if (teli && teli == ntohl(*info) - 2) { + if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { if (net_ratelimit()) @@ -452,7 +452,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: - mtu = ntohl(*info) - offset; + mtu = *info - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; @@ -478,12 +478,12 @@ out: static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { int rel_msg = 0; int rel_type = type; int rel_code = code; - __u32 rel_info = info; + __u32 rel_info = ntohl(info); int err; struct sk_buff *skb2; struct iphdr *eiph; @@ -564,10 +564,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); } - icmp_send(skb2, rel_type, rel_code, rel_info); + icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); out: kfree_skb(skb2); @@ -576,12 +575,12 @@ out: static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __u32 info) + int type, int code, int offset, __be32 info) { int rel_msg = 0; int rel_type = type; int rel_code = code; - __u32 rel_info = info; + __u32 rel_info = ntohl(info); int err; err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, @@ -883,8 +882,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, */ max_headroom += LL_RESERVED_SPACE(tdev); - if (skb_headroom(skb) < max_headroom || - skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1ee50b5782e1..473f165310ea 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -500,4 +500,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); - +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f329029cfbe6..6b038aa72e88 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -123,7 +123,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) struct ipv6hdr *ipv6h; struct inet6_protocol *ops; - if (!(features & NETIF_F_HW_CSUM)) + if (!(features & NETIF_F_V6_CSUM)) features &= ~NETIF_F_SG; if (unlikely(skb_shinfo(skb)->gso_type & @@ -336,16 +336,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVRTHDR: - if (val < 0 || val > 2) - goto e_inval; - np->rxopt.bits.srcrt = val; + np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: - if (val < 0 || val > 2) - goto e_inval; - np->rxopt.bits.osrcrt = val; + np->rxopt.bits.osrcrt = valbool; retv = 0; break; @@ -416,11 +412,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: -#endif break; +#endif default: goto sticky_done; } @@ -559,6 +554,10 @@ done: { struct ipv6_mreq mreq; + retv = -EPROTO; + if (inet_sk(sk)->is_icsk) + break; + retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e308fb41b49..ae9881832a7e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2423,7 +2423,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations igmp6_mc_seq_ops = { +static const struct seq_operations igmp6_mc_seq_ops = { .start = igmp6_mc_seq_start, .next = igmp6_mc_seq_next, .stop = igmp6_mc_seq_stop, @@ -2597,7 +2597,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations igmp6_mcf_seq_ops = { +static const struct seq_operations igmp6_mcf_seq_ops = { .start = igmp6_mcf_seq_start, .next = igmp6_mcf_seq_next, .stop = igmp6_mcf_seq_stop, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 13b7160fb892..8a1399ce38ce 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -30,6 +30,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> +#include <net/rawv6.h> #include <net/xfrm.h> #include <net/mip6.h> @@ -86,7 +87,7 @@ static int mip6_mh_len(int type) return len; } -int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh *mh; @@ -471,7 +472,7 @@ static struct xfrm_type mip6_rthdr_type = .remote_addr = mip6_xfrm_addr, }; -int __init mip6_init(void) +static int __init mip6_init(void) { printk(KERN_INFO "Mobile IPv6\n"); @@ -483,18 +484,35 @@ int __init mip6_init(void) printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); goto mip6_rthdr_xfrm_fail; } + if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { + printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__); + goto mip6_rawv6_mh_fail; + } + + return 0; + mip6_rawv6_mh_fail: + xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); mip6_rthdr_xfrm_fail: xfrm_unregister_type(&mip6_destopt_type, AF_INET6); mip6_destopt_xfrm_fail: return -EAGAIN; } -void __exit mip6_fini(void) +static void __exit mip6_fini(void) { + if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) + printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); } + +module_init(mip6_init); +module_exit(mip6_fini); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0358e6066a4e..5b596659177c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -736,7 +736,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * so fail our DAD process */ addrconf_dad_failure(ifp); - goto out; + return; } else { /* * This is not a dad solicitation. @@ -1268,9 +1268,10 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (ipv6_addr_equal(dest, target)) { on_link = 1; - } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { + } else if (ipv6_addr_type(target) != + (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: target address is not link-local.\n"); + "ICMPv6 Redirect: target address is not link-local unicast.\n"); return; } @@ -1344,9 +1345,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && - !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { + ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { ND_PRINTK2(KERN_WARNING - "ICMPv6 Redirect: target address is not link-local.\n"); + "ICMPv6 Redirect: target address is not link-local unicast.\n"); return; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9aa624026688..cd9df02bb85c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -96,13 +96,13 @@ ip6t_ext_hdr(u8 nexthdr) } /* Returns whether matches rule or not. */ -static inline int +static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, - int *fragoff, int *hotdrop) + int *fragoff, bool *hotdrop) { size_t i; unsigned long ret; @@ -122,7 +122,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr, ipinfo->dmsk.s_addr, ipinfo->dst.s_addr, ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/ - return 0; + return false; } /* Look for ifname matches; this should unroll nicely. */ @@ -136,7 +136,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("VIA in mismatch (%s vs %s).%s\n", indev, ip6info->iniface, ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":""); - return 0; + return false; } for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { @@ -149,7 +149,7 @@ ip6_packet_match(const struct sk_buff *skb, dprintf("VIA out mismatch (%s vs %s).%s\n", outdev, ip6info->outiface, ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":""); - return 0; + return false; } /* ... might want to do something with class and flowlabel here ... */ @@ -162,8 +162,8 @@ ip6_packet_match(const struct sk_buff *skb, protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); if (protohdr < 0) { if (_frag_off == 0) - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } *fragoff = _frag_off; @@ -174,34 +174,34 @@ ip6_packet_match(const struct sk_buff *skb, if (ip6info->proto == protohdr) { if(ip6info->invflags & IP6T_INV_PROTO) { - return 0; + return false; } - return 1; + return true; } /* We need match for the '-p all', too! */ if ((ip6info->proto != 0) && !(ip6info->invflags & IP6T_INV_PROTO)) - return 0; + return false; } - return 1; + return true; } /* should be ip6 safe */ -static inline int +static inline bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) { duprintf("Unknown flag bits set: %08X\n", ipv6->flags & ~IP6T_F_MASK); - return 0; + return false; } if (ipv6->invflags & ~IP6T_INV_MASK) { duprintf("Unknown invflag bits set: %08X\n", ipv6->invflags & ~IP6T_INV_MASK); - return 0; + return false; } - return 1; + return true; } static unsigned int @@ -219,20 +219,20 @@ ip6t_error(struct sk_buff **pskb, } static inline -int do_match(struct ip6t_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - unsigned int protoff, - int *hotdrop) +bool do_match(struct ip6t_entry_match *m, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int offset, + unsigned int protoff, + bool *hotdrop) { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, offset, protoff, hotdrop)) - return 1; + return true; else - return 0; + return false; } static inline struct ip6t_entry * @@ -241,6 +241,113 @@ get_entry(void *base, unsigned int offset) return (struct ip6t_entry *)(base + offset); } +/* All zeroes == unconditional rule. */ +static inline int +unconditional(const struct ip6t_ip6 *ipv6) +{ + unsigned int i; + + for (i = 0; i < sizeof(*ipv6); i++) + if (((char *)ipv6)[i]) + break; + + return (i == sizeof(*ipv6)); +} + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +/* This cries for unification! */ +static const char *hooknames[] = { + [NF_IP6_PRE_ROUTING] = "PREROUTING", + [NF_IP6_LOCAL_IN] = "INPUT", + [NF_IP6_FORWARD] = "FORWARD", + [NF_IP6_LOCAL_OUT] = "OUTPUT", + [NF_IP6_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP6_TRACE_COMMENT_RULE, + NF_IP6_TRACE_COMMENT_RETURN, + NF_IP6_TRACE_COMMENT_POLICY, +}; + +static const char *comments[] = { + [NF_IP6_TRACE_COMMENT_RULE] = "rule", + [NF_IP6_TRACE_COMMENT_RETURN] = "return", + [NF_IP6_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline int +get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, + char *hookname, char **chainname, + char **comment, unsigned int *rulenum) +{ + struct ip6t_standard_target *t = (void *)ip6t_get_target(s); + + if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ip6t_entry) + && strcmp(t->target.u.kernel.target->name, + IP6T_STANDARD_TARGET) == 0 + && t->verdict < 0 + && unconditional(&s->ipv6)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY] + : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN]; + } + return 1; + } else + (*rulenum)++; + + return 0; +} + +static void trace_packet(struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + char *tablename, + struct xt_table_info *private, + struct ip6t_entry *e) +{ + void *table_base; + struct ip6t_entry *root; + char *hookname, *chainname, *comment; + unsigned int rulenum = 0; + + table_base = (void *)private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = (char *)hooknames[hook]; + comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE]; + + IP6T_ENTRY_ITERATE(root, + private->size - private->hook_entry[hook], + get_chainname_rulenum, + e, hookname, &chainname, &comment, &rulenum); + + nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(struct sk_buff **pskb, @@ -252,7 +359,7 @@ ip6t_do_table(struct sk_buff **pskb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; unsigned int protoff = 0; - int hotdrop = 0; + bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; @@ -298,6 +405,14 @@ ip6t_do_table(struct sk_buff **pskb, t = ip6t_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + /* The packet is traced: log it */ + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, + table->name, private, e); +#endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -377,19 +492,6 @@ ip6t_do_table(struct sk_buff **pskb, #endif } -/* All zeroes == unconditional rule. */ -static inline int -unconditional(const struct ip6t_ip6 *ipv6) -{ - unsigned int i; - - for (i = 0; i < sizeof(*ipv6); i++) - if (((char *)ipv6)[i]) - break; - - return (i == sizeof(*ipv6)); -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -1282,16 +1384,16 @@ void ip6t_unregister_table(struct xt_table *table) } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline int +static inline bool icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, - int invert) + bool invert) { return (type == test_type && code >= min_code && code <= max_code) ^ invert; } -static int +static bool icmp6_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -1299,22 +1401,22 @@ icmp6_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct icmp6hdr _icmp, *ic; const struct ip6t_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); if (ic == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return icmp6_type_code_match(icmpinfo->type, @@ -1325,7 +1427,7 @@ icmp6_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -1339,13 +1441,13 @@ icmp6_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct xt_target ip6t_standard_target = { +static struct xt_target ip6t_standard_target __read_mostly = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, }; -static struct xt_target ip6t_error_target = { +static struct xt_target ip6t_error_target __read_mostly = { .name = IP6T_ERROR_TARGET, .target = ip6t_error, .targetsize = IP6T_FUNCTION_MAXNAMELEN, @@ -1360,9 +1462,10 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, + .owner = THIS_MODULE, }; -static struct xt_match icmp6_matchstruct = { +static struct xt_match icmp6_matchstruct __read_mostly = { .name = "icmp6", .match = &icmp6_match, .matchsize = sizeof(struct ip6t_icmp), @@ -1395,7 +1498,7 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err5; - printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); + printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 4115a576ba25..ad4d94310b87 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -58,28 +58,28 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int ip6t_hl_checkentry(const char *tablename, +static bool ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = targinfo; if (info->mode > IP6T_HL_MAXMODE) { printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", info->mode); - return 0; + return false; } - if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " "make sense with value 0\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_target ip6t_HL = { +static struct xt_target ip6t_HL __read_mostly = { .name = "HL", .family = AF_INET6, .target = ip6t_hl_target, diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a7a25177dc0a..6ab99001dccc 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -32,12 +32,6 @@ struct in_device; #include <net/route.h> #include <linux/netfilter_ipv6/ip6t_LOG.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Use lock to serialize, so printks don't overlap */ static DEFINE_SPINLOCK(log_lock); @@ -48,7 +42,8 @@ static void dump_packet(const struct nf_loginfo *info, { u_int8_t currenthdr; int fragment; - struct ipv6hdr _ip6h, *ih; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; unsigned int ptr; unsigned int hdrlen = 0; unsigned int logflags; @@ -78,7 +73,8 @@ static void dump_packet(const struct nf_loginfo *info, ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr, *hp; + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (hp == NULL) { @@ -92,7 +88,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (currenthdr) { case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr, *fh; + struct frag_hdr _fhdr; + const struct frag_hdr *fh; printk("FRAG:"); fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), @@ -131,7 +128,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Max Length */ case IPPROTO_AH: if (logflags & IP6T_LOG_IPOPT) { - struct ip_auth_hdr _ahdr, *ah; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; /* Max length: 3 "AH " */ printk("AH "); @@ -162,7 +160,8 @@ static void dump_packet(const struct nf_loginfo *info, break; case IPPROTO_ESP: if (logflags & IP6T_LOG_IPOPT) { - struct ip_esp_hdr _esph, *eh; + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; /* Max length: 4 "ESP " */ printk("ESP "); @@ -202,7 +201,8 @@ static void dump_packet(const struct nf_loginfo *info, switch (currenthdr) { case IPPROTO_TCP: { - struct tcphdr _tcph, *th; + struct tcphdr _tcph; + const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ printk("PROTO=TCP "); @@ -250,7 +250,8 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IP6T_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; unsigned int i; unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); @@ -273,7 +274,8 @@ static void dump_packet(const struct nf_loginfo *info, } case IPPROTO_UDP: case IPPROTO_UDPLITE: { - struct udphdr _udph, *uh; + struct udphdr _udph; + const struct udphdr *uh; if (currenthdr == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ @@ -298,7 +300,8 @@ static void dump_packet(const struct nf_loginfo *info, break; } case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h, *ic; + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; /* Max length: 13 "PROTO=ICMPv6 " */ printk("PROTO=ICMPv6 "); @@ -448,27 +451,27 @@ ip6t_log_target(struct sk_buff **pskb, } -static int ip6t_log_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ip6t_log_checkentry(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; if (loginfo->level >= 8) { - DEBUGP("LOG: level %u >= 8\n", loginfo->level); - return 0; + pr_debug("LOG: level %u >= 8\n", loginfo->level); + return false; } if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - DEBUGP("LOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix)-1]); - return 0; + pr_debug("LOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix)-1]); + return false; } - return 1; + return true; } -static struct xt_target ip6t_log_reg = { +static struct xt_target ip6t_log_reg __read_mostly = { .name = "LOG", .family = AF_INET6, .target = ip6t_log_target, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index cb3d2415a064..2f487cda3b6b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -34,12 +34,6 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("IP6 tables REJECT target module"); MODULE_LICENSE("GPL"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb) { @@ -54,7 +48,7 @@ static void send_reset(struct sk_buff *oldskb) if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + pr_debug("ip6t_REJECT: addr is not unicast.\n"); return; } @@ -62,16 +56,17 @@ static void send_reset(struct sk_buff *oldskb) tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + pr_debug("ip6t_REJECT: Can't get TCP header.\n"); return; } otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { - DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", - proto, otcplen); + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); return; } @@ -80,14 +75,14 @@ static void send_reset(struct sk_buff *oldskb) /* No RST for RST. */ if (otcph.rst) { - DEBUGP("ip6t_REJECT: RST is set\n"); + pr_debug("ip6t_REJECT: RST is set\n"); return; } /* Check checksum. */ if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, skb_checksum(oldskb, tcphoff, otcplen, 0))) { - DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + pr_debug("ip6t_REJECT: TCP checksum is invalid\n"); return; } @@ -159,7 +154,7 @@ static void send_reset(struct sk_buff *oldskb) tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, &ipv6_hdr(nskb)->daddr, sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial((char *)tcph, + csum_partial(tcph, sizeof(struct tcphdr), 0)); nf_ct_attach(nskb, oldskb); @@ -186,7 +181,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, { const struct ip6t_reject_info *reject = targinfo; - DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + pr_debug("%s: medium point\n", __FUNCTION__); /* WARNING: This code causes reentry within ip6tables. This means that the ip6tables jump stack is now crap. We must return an absolute verdict. --RR */ @@ -221,30 +216,30 @@ static unsigned int reject6_target(struct sk_buff **pskb, return NF_DROP; } -static int check(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; const struct ip6t_entry *e = entry; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); - return 0; + return false; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { - DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); - return 0; + printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + return false; } } - return 1; + return true; } -static struct xt_target ip6t_reject_reg = { +static struct xt_target ip6t_reject_reg __read_mostly = { .name = "REJECT", .family = AF_INET6, .target = reject6_target, diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d3c154371b41..2a25fe25e0e0 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -23,25 +23,20 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 AH match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r=0; - DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,spi,max); + bool r; + + pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x", + invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; - DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -49,9 +44,10 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct ip_auth_hdr *ah, _ah; + struct ip_auth_hdr _ah; + const struct ip_auth_hdr *ah; const struct ip6t_ah *ahinfo = matchinfo; unsigned int ptr; unsigned int hdrlen = 0; @@ -60,40 +56,40 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } hdrlen = (ah->hdrlen + 2) << 2; - DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); - DEBUGP("RES %04X ", ah->reserved); - DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); - - DEBUGP("IPv6 AH spi %02X ", - (spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI)))); - DEBUGP("len %02X %04X %02X ", - ahinfo->hdrlen, hdrlen, - (!ahinfo->hdrlen || - (ahinfo->hdrlen == hdrlen) ^ - !!(ahinfo->invflags & IP6T_AH_INV_LEN))); - DEBUGP("res %02X %04X %02X\n", - ahinfo->hdrres, ah->reserved, - !(ahinfo->hdrres && ah->reserved)); + pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); + pr_debug("RES %04X ", ah->reserved); + pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); + + pr_debug("IPv6 AH spi %02X ", + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI))); + pr_debug("len %02X %04X %02X ", + ahinfo->hdrlen, hdrlen, + (!ahinfo->hdrlen || + (ahinfo->hdrlen == hdrlen) ^ + !!(ahinfo->invflags & IP6T_AH_INV_LEN))); + pr_debug("res %02X %04X %02X\n", + ahinfo->hdrres, ah->reserved, + !(ahinfo->hdrres && ah->reserved)); return (ah != NULL) && - (spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI))) + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI)) && (!ahinfo->hdrlen || (ahinfo->hdrlen == hdrlen) ^ @@ -103,7 +99,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -113,13 +109,13 @@ checkentry(const char *tablename, const struct ip6t_ah *ahinfo = matchinfo; if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { - DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags); - return 0; + pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags); + return false; } - return 1; + return true; } -static struct xt_match ah_match = { +static struct xt_match ah_match __read_mostly = { .name = "ah", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 0f3dd932f0a6..34ba150bfe5d 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -19,7 +19,7 @@ MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -27,16 +27,16 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { unsigned char eui64[8]; int i = 0; if (!(skb_mac_header(skb) >= skb->head && - (skb_mac_header(skb) + ETH_HLEN) <= skb->data) && + skb_mac_header(skb) + ETH_HLEN <= skb->data) && offset != 0) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } memset(eui64, 0, sizeof(eui64)); @@ -50,19 +50,19 @@ match(const struct sk_buff *skb, eui64[0] |= 0x02; i = 0; - while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i]) - && (i < 8)) + while (ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i] + && i < 8) i++; if (i == 8) - return 1; + return true; } } - return 0; + return false; } -static struct xt_match eui64_match = { +static struct xt_match eui64_match __read_mostly = { .name = "eui64", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 5a5da71321b6..968aeba02073 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -22,25 +22,19 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 FRAG match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the id is matched by the range, 0 otherwise */ -static inline int -id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) +static inline bool +id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - int r = 0; - DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', - min, id, max); + bool r; + pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', + min, id, max); r = (id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -48,9 +42,10 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct frag_hdr _frag, *fh; + struct frag_hdr _frag; + const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = matchinfo; unsigned int ptr; int err; @@ -58,53 +53,53 @@ match(const struct sk_buff *skb, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } - DEBUGP("INFO %04X ", fh->frag_off); - DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); - DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); - DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF)); - DEBUGP("ID %u %08X\n", ntohl(fh->identification), - ntohl(fh->identification)); - - DEBUGP("IPv6 FRAG id %02X ", - (id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))); - DEBUGP("res %02X %02X%04X %02X ", - (fraginfo->flags & IP6T_FRAG_RES), fh->reserved, - ntohs(fh->frag_off) & 0x6, - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); - DEBUGP("first %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_FST), - ntohs(fh->frag_off) & ~0x7, - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7))); - DEBUGP("mf %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_MF), - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_MF) - && !((ntohs(fh->frag_off) & IP6_MF)))); - DEBUGP("last %02X %02X %02X\n", - (fraginfo->flags & IP6T_FRAG_NMF), - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF))); + pr_debug("INFO %04X ", fh->frag_off); + pr_debug("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); + pr_debug("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); + pr_debug("MF %04X ", fh->frag_off & htons(IP6_MF)); + pr_debug("ID %u %08X\n", ntohl(fh->identification), + ntohl(fh->identification)); + + pr_debug("IPv6 FRAG id %02X ", + id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))); + pr_debug("res %02X %02X%04X %02X ", + fraginfo->flags & IP6T_FRAG_RES, fh->reserved, + ntohs(fh->frag_off) & 0x6, + !((fraginfo->flags & IP6T_FRAG_RES) + && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); + pr_debug("first %02X %02X %02X ", + fraginfo->flags & IP6T_FRAG_FST, + ntohs(fh->frag_off) & ~0x7, + !((fraginfo->flags & IP6T_FRAG_FST) + && (ntohs(fh->frag_off) & ~0x7))); + pr_debug("mf %02X %02X %02X ", + fraginfo->flags & IP6T_FRAG_MF, + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_MF) + && !((ntohs(fh->frag_off) & IP6_MF)))); + pr_debug("last %02X %02X %02X\n", + fraginfo->flags & IP6T_FRAG_NMF, + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_NMF) + && (ntohs(fh->frag_off) & IP6_MF))); return (fh != NULL) && - (id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))) + id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && !((fraginfo->flags & IP6T_FRAG_RES) && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) @@ -120,7 +115,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -130,13 +125,13 @@ checkentry(const char *tablename, const struct ip6t_frag *fraginfo = matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { - DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags); - return 0; + pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags); + return false; } - return 1; + return true; } -static struct xt_match frag_match = { +static struct xt_match frag_match __read_mostly = { .name = "frag", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index d2373c7cd354..e6ca6018b1ea 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -25,12 +25,6 @@ MODULE_DESCRIPTION("IPv6 opts match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); MODULE_ALIAS("ip6t_dst"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* * (Type & 0xC0) >> 6 * 0 -> ignorable @@ -47,7 +41,7 @@ MODULE_ALIAS("ip6t_dst"); * 5 -> RTALERT 2 x x */ -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -55,45 +49,48 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct ipv6_opt_hdr _optsh, *oh; + struct ipv6_opt_hdr _optsh; + const struct ipv6_opt_hdr *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; - unsigned int ret = 0; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; + bool ret = false; + u8 _opttype; + u8 _optlen; + const u_int8_t *tp = NULL; + const u_int8_t *lp = NULL; unsigned int optlen; int err; err = ipv6_find_hdr(skb, &ptr, match->data, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } hdrlen = ipv6_optlen(oh); if (skb->len - ptr < hdrlen) { /* Packet smaller than it's length field */ - return 0; + return false; } - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); + pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); + pr_debug("len %02X %04X %02X ", + optinfo->hdrlen, hdrlen, + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); ret = (oh != NULL) && (!(optinfo->flags & IP6T_OPTS_LEN) || @@ -105,10 +102,10 @@ match(const struct sk_buff *skb, if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - DEBUGP("Not strict - not implemented"); + pr_debug("Not strict - not implemented"); } else { - DEBUGP("Strict "); - DEBUGP("#%d ", optinfo->optsnr); + pr_debug("Strict "); + pr_debug("#%d ", optinfo->optsnr); for (temp = 0; temp < optinfo->optsnr; temp++) { /* type field exists ? */ if (hdrlen < 1) @@ -120,12 +117,11 @@ match(const struct sk_buff *skb, /* Type check */ if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { - DEBUGP("Tbad %02X %02X\n", - *tp, - (optinfo->opts[temp] & 0xFF00) >> 8); - return 0; + pr_debug("Tbad %02X %02X\n", *tp, + (optinfo->opts[temp] & 0xFF00) >> 8); + return false; } else { - DEBUGP("Tok "); + pr_debug("Tok "); } /* Length check */ if (*tp) { @@ -142,23 +138,23 @@ match(const struct sk_buff *skb, spec_len = optinfo->opts[temp] & 0x00FF; if (spec_len != 0x00FF && spec_len != *lp) { - DEBUGP("Lbad %02X %04X\n", *lp, - spec_len); - return 0; + pr_debug("Lbad %02X %04X\n", *lp, + spec_len); + return false; } - DEBUGP("Lok "); + pr_debug("Lok "); optlen = *lp + 2; } else { - DEBUGP("Pad1\n"); + pr_debug("Pad1\n"); optlen = 1; } /* Step to the next */ - DEBUGP("len%04X \n", optlen); + pr_debug("len%04X \n", optlen); if ((ptr > skb->len - optlen || hdrlen < optlen) && - (temp < optinfo->optsnr - 1)) { - DEBUGP("new pointer is too large! \n"); + temp < optinfo->optsnr - 1) { + pr_debug("new pointer is too large! \n"); break; } ptr += optlen; @@ -167,14 +163,14 @@ match(const struct sk_buff *skb, if (temp == optinfo->optsnr) return ret; else - return 0; + return false; } - return 0; + return false; } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -184,13 +180,13 @@ checkentry(const char *tablename, const struct ip6t_opts *optsinfo = matchinfo; if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); - return 0; + pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); + return false; } - return 1; + return true; } -static struct xt_match opts_match[] = { +static struct xt_match opts_match[] __read_mostly = { { .name = "hbh", .family = AF_INET6, diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index d606c0e6d6fd..ca29ec00dc18 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -19,37 +19,37 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP tables Hop Limit matching module"); MODULE_LICENSE("GPL"); -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, bool *hotdrop) { const struct ip6t_hl_info *info = matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); switch (info->mode) { case IP6T_HL_EQ: - return (ip6h->hop_limit == info->hop_limit); + return ip6h->hop_limit == info->hop_limit; break; case IP6T_HL_NE: - return (!(ip6h->hop_limit == info->hop_limit)); + return ip6h->hop_limit != info->hop_limit; break; case IP6T_HL_LT: - return (ip6h->hop_limit < info->hop_limit); + return ip6h->hop_limit < info->hop_limit; break; case IP6T_HL_GT: - return (ip6h->hop_limit > info->hop_limit); + return ip6h->hop_limit > info->hop_limit; break; default: printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", info->mode); - return 0; + return false; } - return 0; + return false; } -static struct xt_match hl_match = { +static struct xt_match hl_match __read_mostly = { .name = "hl", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index fd6a0869099b..2c65c2f9a4ab 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -26,7 +26,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 headers match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); -static int +static bool ipv6header_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -34,7 +34,7 @@ ipv6header_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ip6t_ipv6header_info *info = matchinfo; unsigned int temp; @@ -58,7 +58,7 @@ ipv6header_match(const struct sk_buff *skb, /* Is there enough space for the next ext header? */ if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; + return false; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; @@ -74,9 +74,9 @@ ipv6header_match(const struct sk_buff *skb, BUG_ON(hp == NULL); /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { + if (nexthdr == NEXTHDR_FRAGMENT) hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) + else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen + 2) << 2; else hdrlen = ipv6_optlen(hp); @@ -99,7 +99,7 @@ ipv6header_match(const struct sk_buff *skb, temp |= MASK_DSTOPTS; break; default: - return 0; + return false; break; } @@ -110,7 +110,7 @@ ipv6header_match(const struct sk_buff *skb, break; } - if ((nexthdr != NEXTHDR_NONE) && (nexthdr != NEXTHDR_ESP)) + if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP) temp |= MASK_PROTO; if (info->modeflag) @@ -124,7 +124,7 @@ ipv6header_match(const struct sk_buff *skb, } } -static int +static bool ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -136,12 +136,12 @@ ipv6header_checkentry(const char *tablename, /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && info->invflags != 0xFF) - return 0; + return false; - return 1; + return true; } -static struct xt_match ip6t_ipv6header_match = { +static struct xt_match ip6t_ipv6header_match __read_mostly = { .name = "ipv6header", .family = AF_INET6, .match = &ipv6header_match, diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index c2a909893a64..0fa714092dc9 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -31,16 +31,13 @@ MODULE_LICENSE("GPL"); #endif /* Returns 1 if the type is matched by the range, 0 otherwise */ -static inline int -type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert) +static inline bool +type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { - int ret; - - ret = (type >= min && type <= max) ^ invert; - return ret; + return (type >= min && type <= max) ^ invert; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -48,29 +45,30 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct ip6_mh _mh, *mh; + struct ip6_mh _mh; + const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil MH tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { duprintf("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, @@ -78,7 +76,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool mh_checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -91,7 +89,7 @@ mh_checkentry(const char *tablename, return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); } -static struct xt_match mh_match = { +static struct xt_match mh_match __read_mostly = { .name = "mh", .family = AF_INET6, .checkentry = mh_checkentry, diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 43738bba00b5..6036613aef36 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -23,7 +23,7 @@ MODULE_DESCRIPTION("IP6 tables owner matching module"); MODULE_LICENSE("GPL"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -31,29 +31,27 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct ip6t_owner_info *info = matchinfo; if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; + return false; - if (info->match & IP6T_OWNER_UID) { + if (info->match & IP6T_OWNER_UID) if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) - return 0; - } + return false; - if (info->match & IP6T_OWNER_GID) { + if (info->match & IP6T_OWNER_GID) if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) - return 0; - } + return false; - return 1; + return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -65,12 +63,12 @@ checkentry(const char *tablename, if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { printk("ipt_owner: pid and sid matching " "not supported anymore\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_match owner_match = { +static struct xt_match owner_match __read_mostly = { .name = "owner", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 81ab00d8c182..357cea703bd9 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -24,25 +24,19 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 RT match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Returns 1 if the id is matched by the range, 0 otherwise */ -static inline int -segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) +static inline bool +segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - int r = 0; - DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", - invert ? '!' : ' ', min, id, max); + bool r; + pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", + invert ? '!' : ' ', min, id, max); r = (id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -50,59 +44,61 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct ipv6_rt_hdr _route, *rh; + struct ipv6_rt_hdr _route; + const struct ipv6_rt_hdr *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; - unsigned int ret = 0; - struct in6_addr *ap, _addr; + bool ret = false; + struct in6_addr _addr; + const struct in6_addr *ap; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); if (rh == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } hdrlen = ipv6_optlen(rh); if (skb->len - ptr < hdrlen) { /* Pcket smaller than its length field */ - return 0; + return false; } - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - DEBUGP("TYPE %04X ", rh->type); - DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - DEBUGP("IPv6 RT segsleft %02X ", - (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS)))); - DEBUGP("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - DEBUGP("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - (!(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); - DEBUGP("res %02X %02X %02X ", - (rtinfo->flags & IP6T_RT_RES), - ((struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((struct rt0_hdr *)rh)->reserved))); + pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); + pr_debug("TYPE %04X ", rh->type); + pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); + + pr_debug("IPv6 RT segsleft %02X ", + segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + rh->segments_left, + !!(rtinfo->invflags & IP6T_RT_INV_SGS))); + pr_debug("type %02X %02X %02X ", + rtinfo->rt_type, rh->type, + (!(rtinfo->flags & IP6T_RT_TYP) || + ((rtinfo->rt_type == rh->type) ^ + !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); + pr_debug("len %02X %04X %02X ", + rtinfo->hdrlen, hdrlen, + !(rtinfo->flags & IP6T_RT_LEN) || + ((rtinfo->hdrlen == hdrlen) ^ + !!(rtinfo->invflags & IP6T_RT_INV_LEN))); + pr_debug("res %02X %02X %02X ", + rtinfo->flags & IP6T_RT_RES, + ((const struct rt0_hdr *)rh)->reserved, + !((rtinfo->flags & IP6T_RT_RES) && + (((const struct rt0_hdr *)rh)->reserved))); ret = (rh != NULL) && @@ -129,18 +125,18 @@ match(const struct sk_buff *skb, ret = (*rp == 0); } - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - DEBUGP("Not strict "); + pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - DEBUGP("There isn't enough space\n"); - return 0; + pr_debug("There isn't enough space\n"); + return false; } else { unsigned int i = 0; - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -154,25 +150,25 @@ match(const struct sk_buff *skb, BUG_ON(ap == NULL); if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - DEBUGP("i=%d temp=%d;\n", i, temp); + pr_debug("i=%d temp=%d;\n", i, temp); i++; } if (i == rtinfo->addrnr) break; } - DEBUGP("i=%d #%d\n", i, rtinfo->addrnr); + pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else - return 0; + return false; } } else { - DEBUGP("Strict "); + pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - DEBUGP("There isn't enough space\n"); - return 0; + pr_debug("There isn't enough space\n"); + return false; } else { - DEBUGP("#%d ", rtinfo->addrnr); + pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -185,20 +181,20 @@ match(const struct sk_buff *skb, if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr); - if ((temp == rtinfo->addrnr) && - (temp == (unsigned int)((hdrlen - 8) / 16))) + pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); + if (temp == rtinfo->addrnr && + temp == (unsigned int)((hdrlen - 8) / 16)) return ret; else - return 0; + return false; } } - return 0; + return false; } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -208,21 +204,21 @@ checkentry(const char *tablename, const struct ip6t_rt *rtinfo = matchinfo; if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { - DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags); - return 0; + pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags); + return false; } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) || (rtinfo->invflags & IP6T_RT_INV_TYP))) { - DEBUGP("`--rt-type 0' required before `--rt-0-*'"); - return 0; + pr_debug("`--rt-type 0' required before `--rt-0-*'"); + return false; } - return 1; + return true; } -static struct xt_match rt_match = { +static struct xt_match rt_match __read_mostly = { .name = "rt", .family = AF_INET6, .match = match, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f2d26495f413..f0a9efa67fb5 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,12 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_IP6_LOCAL_OUT) | \ (1 << NF_IP6_POST_ROUTING)) -#if 0 -#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) -#else -#define DEBUGP(x, args...) -#endif - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 0acda45d455d..ec290e4ebdd8 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,12 +8,6 @@ #define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) -#if 0 -#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) -#else -#define DEBUGP(x, args...) -#endif - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1b1797f1f33d..3153e15e0f7c 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -26,12 +26,6 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_core.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -92,8 +86,8 @@ static int ipv6_print_conntrack(struct seq_file *s, * - Note also special handling of AUTH header. Thanks to IPsec wizards. */ -int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, - int len) +static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, + u8 *nexthdrp, int len) { u8 nexthdr = *nexthdrp; @@ -123,22 +117,25 @@ int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, return start; } -static int -ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; - int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); + unsigned char pnum; + int protoff; + + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), + &pnum, sizeof(pnum)) != 0) { + pr_debug("ip6_conntrack_core: can't get nexthdr\n"); + return -NF_ACCEPT; + } + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); /* - * (protoff == (*pskb)->len) mean that the packet doesn't have no data + * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > (*pskb)->len)) { - DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + if ((protoff < 0) || (protoff > skb->len)) { + pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } @@ -147,11 +144,6 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, return NF_ACCEPT; } -static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) -{ - return NF_CT_F_BASIC; -} - static unsigned int ipv6_confirm(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -183,7 +175,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, (*pskb)->len - extoff); if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { - DEBUGP("proto header not found\n"); + pr_debug("proto header not found\n"); return NF_ACCEPT; } @@ -381,14 +373,14 @@ static int ipv6_nfattr_to_tuple(struct nfattr *tb[], } #endif -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .print_conntrack = ipv6_print_conntrack, - .prepare = ipv6_prepare, + .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv6_tuple_to_nfattr, .nfattr_to_tuple = ipv6_nfattr_to_tuple, @@ -397,7 +389,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .ctl_table_path = nf_net_netfilter_sysctl_path, .ctl_table = nf_ct_ipv6_sysctl_table, #endif - .get_features = ipv6_get_features, .me = THIS_MODULE, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 8814b95b2326..ab154fb90018 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -27,12 +27,6 @@ static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -125,8 +119,8 @@ static int icmpv6_new(struct nf_conn *conntrack, if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ - DEBUGP("icmpv6: can't create new conn with type %u\n", - type + 128); + pr_debug("icmpv6: can't create new conn with type %u\n", + type + 128); NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); return 0; } @@ -142,60 +136,36 @@ icmpv6_error_message(struct sk_buff *skb, { struct nf_conntrack_tuple intuple, origtuple; struct nf_conntrack_tuple_hash *h; - struct icmp6hdr _hdr, *hp; - unsigned int inip6off; struct nf_conntrack_l4proto *inproto; - u_int8_t inprotonum; - unsigned int inprotoff; NF_CT_ASSERT(skb->nfct == NULL); - hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); - if (hp == NULL) { - DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); - return -NF_ACCEPT; - } - - inip6off = icmp6off + sizeof(_hdr); - if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), - &inprotonum, sizeof(inprotonum)) != 0) { - DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); - return -NF_ACCEPT; - } - inprotoff = nf_ct_ipv6_skip_exthdr(skb, - inip6off + sizeof(struct ipv6hdr), - &inprotonum, - skb->len - inip6off - - sizeof(struct ipv6hdr)); - - if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) { - DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct icmp6hdr), + PF_INET6, &origtuple)) { + pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum); - - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, - &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { - DEBUGP("icmpv6_error: Can't get tuple\n"); - return -NF_ACCEPT; - } + inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&intuple, &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { - DEBUGP("icmpv6_error: Can't invert tuple\n"); + pr_debug("icmpv6_error: Can't invert tuple\n"); return -NF_ACCEPT; } *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple, NULL); + h = nf_conntrack_find_get(&intuple); if (!h) { - DEBUGP("icmpv6_error: no match\n"); + pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; } else { if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) @@ -306,7 +276,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, @@ -327,5 +297,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .ctl_table = icmpv6_sysctl_table, #endif }; - -EXPORT_SYMBOL(nf_conntrack_l4proto_icmpv6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 347ab7608231..25442a8c1ba8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -44,12 +44,6 @@ #include <linux/kernel.h> #include <linux/module.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT @@ -343,7 +337,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str struct nf_ct_frag6_queue *fq; if ((fq = frag_alloc_queue()) == NULL) { - DEBUGP("Can't alloc new queue\n"); + pr_debug("Can't alloc new queue\n"); goto oom; } @@ -393,7 +387,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, int offset, end; if (fq->last_in & COMPLETE) { - DEBUGP("Allready completed\n"); + pr_debug("Allready completed\n"); goto err; } @@ -402,7 +396,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - DEBUGP("offset is too large.\n"); + pr_debug("offset is too large.\n"); return -1; } @@ -420,7 +414,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, */ if (end < fq->len || ((fq->last_in & LAST_IN) && end != fq->len)) { - DEBUGP("already received last fragment\n"); + pr_debug("already received last fragment\n"); goto err; } fq->last_in |= LAST_IN; @@ -433,13 +427,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); + pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } if (end > fq->len) { /* Some bits beyond end -> corruption. */ if (fq->last_in & LAST_IN) { - DEBUGP("last packet already reached.\n"); + pr_debug("last packet already reached.\n"); goto err; } fq->len = end; @@ -451,11 +445,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { - DEBUGP("queue: message is too short.\n"); + pr_debug("queue: message is too short.\n"); goto err; } if (pskb_trim_rcsum(skb, end - offset)) { - DEBUGP("Can't trim\n"); + pr_debug("Can't trim\n"); goto err; } @@ -480,11 +474,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (i > 0) { offset += i; if (end <= offset) { - DEBUGP("overlap\n"); + pr_debug("overlap\n"); goto err; } if (!pskb_pull(skb, i)) { - DEBUGP("Can't pull\n"); + pr_debug("Can't pull\n"); goto err; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -503,7 +497,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Eat head of the next overlapped fragment * and leave the loop. The next ones cannot overlap. */ - DEBUGP("Eat head of the overlapped parts.: %d", i); + pr_debug("Eat head of the overlapped parts.: %d", i); if (!pskb_pull(next, i)) goto err; @@ -586,13 +580,13 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - DEBUGP("payload len is too large.\n"); + pr_debug("payload len is too large.\n"); goto out_oversize; } /* Head of list must not be cloned. */ if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { - DEBUGP("skb is cloned but can't expand head"); + pr_debug("skb is cloned but can't expand head"); goto out_oom; } @@ -604,7 +598,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) int i, plen = 0; if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { - DEBUGP("Can't alloc skb\n"); + pr_debug("Can't alloc skb\n"); goto out_oom; } clone->next = head->next; @@ -719,11 +713,11 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return -1; } if (len < (int)sizeof(struct ipv6_opt_hdr)) { - DEBUGP("too short\n"); + pr_debug("too short\n"); return -1; } if (nexthdr == NEXTHDR_NONE) { - DEBUGP("next header is none\n"); + pr_debug("next header is none\n"); return -1; } if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) @@ -764,7 +758,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { - DEBUGP("payload len = 0\n"); + pr_debug("payload len = 0\n"); return skb; } @@ -773,14 +767,14 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) clone = skb_clone(skb, GFP_ATOMIC); if (clone == NULL) { - DEBUGP("Can't clone skb\n"); + pr_debug("Can't clone skb\n"); return skb; } NFCT_FRAG6_CB(clone)->orig = skb; if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - DEBUGP("message is too short.\n"); + pr_debug("message is too short.\n"); goto ret_orig; } @@ -789,7 +783,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) fhdr = (struct frag_hdr *)skb_transport_header(clone); if (!(fhdr->frag_off & htons(0xFFF9))) { - DEBUGP("Invalid fragment offset\n"); + pr_debug("Invalid fragment offset\n"); /* It is not a fragmented frame */ goto ret_orig; } @@ -799,7 +793,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); if (fq == NULL) { - DEBUGP("Can't find and can't create new queue\n"); + pr_debug("Can't find and can't create new queue\n"); goto ret_orig; } @@ -807,7 +801,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock(&fq->lock); - DEBUGP("Can't insert skb to queue\n"); + pr_debug("Can't insert skb to queue\n"); fq_put(fq, NULL); goto ret_orig; } @@ -815,7 +809,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) - DEBUGP("Can't reassemble fragmented packets\n"); + pr_debug("Can't reassemble fragmented packets\n"); } spin_unlock(&fq->lock); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a58459a76684..77167afa3455 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -49,7 +49,7 @@ #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include <net/mip6.h> #endif @@ -137,6 +137,28 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) return 0; } +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); + +int rawv6_mh_filter_register(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, filter); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_register); + +int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, + struct sk_buff *skb)) +{ + rcu_assign_pointer(mh_filter, NULL); + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL(rawv6_mh_filter_unregister); + +#endif + /* * demultiplex raw sockets. * (should consider queueing the skb in the sock receive_queue @@ -178,16 +200,22 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); break; -#ifdef CONFIG_IPV6_MIP6 + +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: + { /* XXX: To validate MH only once for each packet, * this is placed here. It should be after checking * xfrm policy, however it doesn't. The checking xfrm * policy is placed in rawv6_rcv() because it is * required for each socket. */ - filtered = mip6_mh_filter(sk, skb); + int (*filter)(struct sock *sock, struct sk_buff *skb); + + filter = rcu_dereference(mh_filter); + filtered = filter ? filter(sk, skb) : 0; break; + } #endif default: filtered = 0; @@ -611,9 +639,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; -#ifdef CONFIG_IPV6_MIP6 u8 len = 0; -#endif int probed = 0; int i; @@ -646,7 +672,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) probed = 1; } break; -#ifdef CONFIG_IPV6_MIP6 case IPPROTO_MH: if (iov->iov_base && iov->iov_len < 1) break; @@ -660,7 +685,6 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) len += iov->iov_len; break; -#endif default: probed = 1; break; @@ -858,11 +882,10 @@ back_from_confirm: ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = rawv6_push_pending_frames(sk, &fl, rp); + release_sock(sk); } done: dst_release(dst); - if (!inet->hdrincl) - release_sock(sk); out: fl6_sock_release(flowlabel); return err<0?err:len; @@ -1256,7 +1279,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations raw6_seq_ops = { +static const struct seq_operations raw6_seq_ops = { .start = raw6_seq_start, .next = raw6_seq_next, .stop = raw6_seq_stop, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fe8d9837f9f8..55ea80fac601 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1983,9 +1983,10 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) !dst_metric_locked(&rt->u.dst, RTAX_MTU) && (dst_mtu(&rt->u.dst) > arg->mtu || (dst_mtu(&rt->u.dst) < arg->mtu && - dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) + dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + } return 0; } @@ -2555,7 +2556,7 @@ void __init ip6_route_init(void) #endif ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; fib6_init(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1efa95a99f45..eb20bb690abd 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -532,7 +532,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) */ max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b81cbce547a..3e06799b37a6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -56,7 +56,6 @@ #include <net/inet_ecn.h> #include <net/protocol.h> #include <net/xfrm.h> -#include <net/addrconf.h> #include <net/snmp.h> #include <net/dsfield.h> #include <net/timewait_sock.h> @@ -484,17 +483,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; - if (opt == NULL && - np->rxopt.bits.osrcrt == 2 && - treq->pktopts) { - struct sk_buff *pktopts = treq->pktopts; - struct inet6_skb_parm *rxopt = IP6CB(pktopts); - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(pktopts) + - rxopt->srcrt)); - } - if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); @@ -551,7 +539,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, for (i = 0; i < tp->md5sig_info->entries6; i++) { if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, addr) == 0) - return (struct tcp_md5sig_key *)&tp->md5sig_info->keys6[i]; + return &tp->md5sig_info->keys6[i].base; } return NULL; } @@ -579,9 +567,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, key = (struct tcp6_md5sig_key*) tcp_v6_md5_do_lookup(sk, peer); if (key) { /* modify existing entry - just update that one */ - kfree(key->key); - key->key = newkey; - key->keylen = newkeylen; + kfree(key->base.key); + key->base.key = newkey; + key->base.keylen = newkeylen; } else { /* reallocate new list if current one is full. */ if (!tp->md5sig_info) { @@ -615,8 +603,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr, peer); - tp->md5sig_info->keys6[tp->md5sig_info->entries6].key = newkey; - tp->md5sig_info->keys6[tp->md5sig_info->entries6].keylen = newkeylen; + tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey; + tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen; tp->md5sig_info->entries6++; } @@ -638,7 +626,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer) for (i = 0; i < tp->md5sig_info->entries6; i++) { if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, peer) == 0) { /* Free the key */ - kfree(tp->md5sig_info->keys6[i].key); + kfree(tp->md5sig_info->keys6[i].base.key); tp->md5sig_info->entries6--; if (tp->md5sig_info->entries6 == 0) { @@ -669,7 +657,7 @@ static void tcp_v6_clear_md5_list (struct sock *sk) if (tp->md5sig_info->entries6) { for (i = 0; i < tp->md5sig_info->entries6; i++) - kfree(tp->md5sig_info->keys6[i].key); + kfree(tp->md5sig_info->keys6[i].base.key); tp->md5sig_info->entries6 = 0; tcp_free_md5sig_pool(); } @@ -680,7 +668,7 @@ static void tcp_v6_clear_md5_list (struct sock *sk) if (tp->md5sig_info->entries4) { for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].key); + kfree(tp->md5sig_info->keys4[i].base.key); tp->md5sig_info->entries4 = 0; tcp_free_md5sig_pool(); } @@ -1392,15 +1380,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.osrcrt == 2 && - opt == NULL && treq->pktopts) { - struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); - if (rxopt->srcrt) - opt = ipv6_invert_rthdr(sk, - (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) + - rxopt->srcrt)); - } - if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4210951edb6e..c347f3e30e2e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -555,6 +555,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) out: up->len = 0; up->pending = 0; + if (!err) + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -823,10 +825,8 @@ do_append_data: release_sock(sk); out: fl6_sock_release(flowlabel); - if (!err) { - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1faa2ea80afc..3ec0c4770ee3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -18,7 +18,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include <net/mip6.h> #endif @@ -318,7 +318,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index baa461b9f74e..cdadb4847469 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -65,7 +65,7 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || @@ -130,7 +130,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) goto end; /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { if (src[i] && (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5502cc948dfb..30f3236c402a 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -84,7 +84,7 @@ static int xfrm6_tunnel_spi_init(void) xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi", sizeof(struct xfrm6_tunnel_spi), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; @@ -379,3 +379,4 @@ static void __exit xfrm6_tunnel_fini(void) module_init(xfrm6_tunnel_init); module_exit(xfrm6_tunnel_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index db32ac8e79bd..4226e71ae1e3 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -286,21 +286,21 @@ out: return 0; } -static struct seq_operations ipx_seq_interface_ops = { +static const struct seq_operations ipx_seq_interface_ops = { .start = ipx_seq_interface_start, .next = ipx_seq_interface_next, .stop = ipx_seq_interface_stop, .show = ipx_seq_interface_show, }; -static struct seq_operations ipx_seq_route_ops = { +static const struct seq_operations ipx_seq_route_ops = { .start = ipx_seq_route_start, .next = ipx_seq_route_next, .stop = ipx_seq_route_stop, .show = ipx_seq_route_show, }; -static struct seq_operations ipx_seq_socket_ops = { +static const struct seq_operations ipx_seq_socket_ops = { .start = ipx_seq_socket_start, .next = ipx_seq_socket_next, .stop = ipx_seq_interface_stop, diff --git a/net/irda/Makefile b/net/irda/Makefile index d1366c2a39cb..187f6c563a4b 100644 --- a/net/irda/Makefile +++ b/net/irda/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_IRCOMM) += ircomm/ irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \ irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \ irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \ - discovery.o parameters.o irmod.o + discovery.o parameters.o irnetlink.o irmod.o irda-$(CONFIG_PROC_FS) += irproc.o irda-$(CONFIG_SYSCTL) += irsysctl.o diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index dcd7e325b283..4c670cf6aefa 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2567,7 +2567,7 @@ int __init irsock_init(void) * Remove IrDA protocol * */ -void __exit irsock_cleanup(void) +void irsock_cleanup(void) { sock_unregister(PF_IRDA); proto_unregister(&irda_proto); diff --git a/net/irda/discovery.c b/net/irda/discovery.c index f09734128674..af0cea721d2a 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -395,7 +395,7 @@ static int discovery_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations discovery_seq_ops = { +static const struct seq_operations discovery_seq_ops = { .start = discovery_seq_start, .next = discovery_seq_next, .stop = discovery_seq_stop, diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 4749f8f55391..2d63fa8e1556 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -562,7 +562,7 @@ static int ircomm_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ircomm_seq_ops = { +static const struct seq_operations ircomm_seq_ops = { .start = ircomm_seq_start, .next = ircomm_seq_next, .stop = ircomm_seq_stop, diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 7b5def1ea633..435b563d29a6 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -95,14 +95,14 @@ int __init irda_device_init( void) return 0; } -static void __exit leftover_dongle(void *arg) +static void leftover_dongle(void *arg) { struct dongle_reg *reg = arg; IRDA_WARNING("IrDA: Dongle type %x not unregistered\n", reg->type); } -void __exit irda_device_cleanup(void) +void irda_device_cleanup(void) { IRDA_DEBUG(4, "%s()\n", __FUNCTION__); diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 915d9384f36a..ee3889fa49ab 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -153,7 +153,7 @@ int __init iriap_init(void) * Initializes the IrIAP layer, called by the module cleanup code in * irmod.c */ -void __exit iriap_cleanup(void) +void iriap_cleanup(void) { irlmp_unregister_service(service_handle); @@ -1066,7 +1066,7 @@ static int irias_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irias_seq_ops = { +static const struct seq_operations irias_seq_ops = { .start = irias_seq_start, .next = irias_seq_next, .stop = irias_seq_stop, diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 4adaae242b9e..cf302457097b 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -36,39 +36,6 @@ hashbin_t *irias_objects; */ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; -/* - * Function strndup (str, max) - * - * My own kernel version of strndup! - * - * Faster, check boundary... Jean II - */ -static char *strndup(char *str, size_t max) -{ - char *new_str; - int len; - - /* Check string */ - if (str == NULL) - return NULL; - /* Check length, truncate */ - len = strlen(str); - if(len > max) - len = max; - - /* Allocate new string */ - new_str = kmalloc(len + 1, GFP_ATOMIC); - if (new_str == NULL) { - IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); - return NULL; - } - - /* Copy and truncate */ - memcpy(new_str, str, len); - new_str[len] = '\0'; - - return new_str; -} /* * Function ias_new_object (name, id) @@ -90,7 +57,7 @@ struct ias_object *irias_new_object( char *name, int id) } obj->magic = IAS_OBJECT_MAGIC; - obj->name = strndup(name, IAS_MAX_CLASSNAME); + obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); if (!obj->name) { IRDA_WARNING("%s(), Unable to allocate name!\n", __FUNCTION__); @@ -360,7 +327,7 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); /* Insert value */ attrib->value = irias_new_integer_value(value); @@ -404,7 +371,7 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_octseq_value( octets, len); if (!attrib->name || !attrib->value) { @@ -446,7 +413,7 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_string_value(value); if (!attrib->name || !attrib->value) { @@ -506,7 +473,7 @@ struct ias_value *irias_new_string_value(char *string) value->type = IAS_STRING; value->charset = CS_ASCII; - value->t.string = strndup(string, IAS_MAX_STRING); + value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); if (!value->t.string) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); kfree(value); diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index ed69773b0f8e..f5778ef3ccc7 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1217,7 +1217,7 @@ static int irlan_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlan_seq_ops = { +static const struct seq_operations irlan_seq_ops = { .start = irlan_seq_start, .next = irlan_seq_next, .stop = irlan_seq_stop, diff --git a/net/irda/irlap.c b/net/irda/irlap.c index d93ebd11431e..3d76aafdb2e5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -95,7 +95,7 @@ int __init irlap_init(void) return 0; } -void __exit irlap_cleanup(void) +void irlap_cleanup(void) { IRDA_ASSERT(irlap != NULL, return;); @@ -1210,7 +1210,7 @@ static int irlap_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlap_seq_ops = { +static const struct seq_operations irlap_seq_ops = { .start = irlap_seq_start, .next = irlap_seq_next, .stop = irlap_seq_stop, diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 3013c49ab975..25a3444a9234 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -101,6 +101,13 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb) irlap_insert_info(self, skb); + if (unlikely(self->mode & IRDA_MODE_MONITOR)) { + IRDA_DEBUG(3, "%s(): %s is in monitor mode\n", __FUNCTION__, + self->netdev->name); + dev_kfree_skb(skb); + return; + } + dev_queue_xmit(skb); } diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 9df0461b6d18..7efa930ed684 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -116,7 +116,7 @@ int __init irlmp_init(void) * Remove IrLMP layer * */ -void __exit irlmp_cleanup(void) +void irlmp_cleanup(void) { /* Check for main structure */ IRDA_ASSERT(irlmp != NULL, return;); @@ -1994,7 +1994,7 @@ static int irlmp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irlmp_seq_ops = { +static const struct seq_operations irlmp_seq_ops = { .start = irlmp_seq_start, .next = irlmp_seq_next, .stop = irlmp_seq_stop, diff --git a/net/irda/irmod.c b/net/irda/irmod.c index c7fad2c5b9f3..8ba703da2797 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -88,16 +88,23 @@ EXPORT_SYMBOL(irda_notify_init); */ static int __init irda_init(void) { + int ret = 0; + IRDA_DEBUG(0, "%s()\n", __FUNCTION__); /* Lower layer of the stack */ irlmp_init(); irlap_init(); + /* Driver/dongle support */ + irda_device_init(); + /* Higher layers of the stack */ iriap_init(); irttp_init(); - irsock_init(); + ret = irsock_init(); + if (ret < 0) + goto out_err_1; /* Add IrDA packet type (Start receiving packets) */ dev_add_pack(&irda_packet_type); @@ -107,13 +114,44 @@ static int __init irda_init(void) irda_proc_register(); #endif #ifdef CONFIG_SYSCTL - irda_sysctl_register(); + ret = irda_sysctl_register(); + if (ret < 0) + goto out_err_2; #endif - /* Driver/dongle support */ - irda_device_init(); + ret = irda_nl_register(); + if (ret < 0) + goto out_err_3; return 0; + + out_err_3: +#ifdef CONFIG_SYSCTL + irda_sysctl_unregister(); + out_err_2: +#endif +#ifdef CONFIG_PROC_FS + irda_proc_unregister(); +#endif + + /* Remove IrDA packet type (stop receiving packets) */ + dev_remove_pack(&irda_packet_type); + + /* Remove higher layers */ + irsock_cleanup(); + out_err_1: + irttp_cleanup(); + iriap_cleanup(); + + /* Remove lower layers */ + irda_device_cleanup(); + irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */ + + /* Remove middle layer */ + irlmp_cleanup(); + + + return ret; } /* @@ -125,6 +163,8 @@ static int __init irda_init(void) static void __exit irda_cleanup(void) { /* Remove External APIs */ + irda_nl_unregister(); + #ifdef CONFIG_SYSCTL irda_sysctl_unregister(); #endif diff --git a/net/irda/irproc.c b/net/irda/irproc.c index d6f9aba5b9dc..181cb51b48a8 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -84,7 +84,7 @@ void __init irda_proc_register(void) * Unregister irda entry in /proc file system * */ -void __exit irda_proc_unregister(void) +void irda_proc_unregister(void) { int i; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 2e968e7d8fea..957e04feb0f7 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -287,7 +287,7 @@ int __init irda_sysctl_register(void) * Unregister our sysctl interface * */ -void __exit irda_sysctl_unregister(void) +void irda_sysctl_unregister(void) { unregister_sysctl_table(irda_table_header); } diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 7069e4a58257..3d7ab03fb131 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -109,7 +109,7 @@ int __init irttp_init(void) * Called by module destruction/cleanup code * */ -void __exit irttp_cleanup(void) +void irttp_cleanup(void) { /* Check for main structure */ IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;); @@ -369,6 +369,20 @@ static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, /* Everything is happily mixed up. Waiting for next clean up - Jean II */ /* + * Initialization, that has to be done on new tsap + * instance allocation and on duplication + */ +static void irttp_init_tsap(struct tsap_cb *tsap) +{ + spin_lock_init(&tsap->lock); + init_timer(&tsap->todo_timer); + + skb_queue_head_init(&tsap->rx_queue); + skb_queue_head_init(&tsap->tx_queue); + skb_queue_head_init(&tsap->rx_fragments); +} + +/* * Function irttp_open_tsap (stsap, notify) * * Create TSAP connection endpoint, @@ -395,10 +409,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__); return NULL; } - spin_lock_init(&self->lock); + + /* Initialize internal objects */ + irttp_init_tsap(self); /* Initialise todo timer */ - init_timer(&self->todo_timer); self->todo_timer.data = (unsigned long) self; self->todo_timer.function = &irttp_todo_expired; @@ -418,9 +433,6 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) self->magic = TTP_TSAP_MAGIC; self->connected = FALSE; - skb_queue_head_init(&self->rx_queue); - skb_queue_head_init(&self->tx_queue); - skb_queue_head_init(&self->rx_fragments); /* * Create LSAP at IrLMP layer */ @@ -1455,12 +1467,9 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) /* Not everything should be copied */ new->notify.instance = instance; - spin_lock_init(&new->lock); - init_timer(&new->todo_timer); - skb_queue_head_init(&new->rx_queue); - skb_queue_head_init(&new->tx_queue); - skb_queue_head_init(&new->rx_fragments); + /* Initialize internal objects */ + irttp_init_tsap(new); /* This is locked */ hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL); @@ -1866,7 +1875,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations irttp_seq_ops = { +static const struct seq_operations irttp_seq_ops = { .start = irttp_seq_start, .next = irttp_seq_next, .stop = irttp_seq_stop, diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index f8fcc3d10327..16ce9cd4f39e 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,13 +1,13 @@ config IUCV - tristate "IUCV support (VM only)" + tristate "IUCV support (S390 - z/VM only)" depends on S390 help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + Select this option if you want to use inter-user communication + under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (VM only)" + tristate "AF_IUCV support (S390 - z/VM only)" depends on IUCV help Select this option if you want to use inter-user communication under diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index d9e9ddb8eac5..53ae14c35f70 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -219,6 +219,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) sock_init_data(sock, sk); INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); + spin_lock_init(&iucv_sk(sk)->accept_q_lock); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; @@ -274,15 +275,25 @@ void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(parent); + sock_hold(sk); - list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q); + spin_lock_irqsave(&par->accept_q_lock, flags); + list_add_tail(&iucv_sk(sk)->accept_q, &par->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent = parent; parent->sk_ack_backlog++; } void iucv_accept_unlink(struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); + + spin_lock_irqsave(&par->accept_q_lock, flags); list_del_init(&iucv_sk(sk)->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent->sk_ack_backlog--; iucv_sk(sk)->parent = NULL; sock_put(sk); @@ -298,8 +309,8 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) lock_sock(sk); if (sk->sk_state == IUCV_CLOSED) { - release_sock(sk); iucv_accept_unlink(sk); + release_sock(sk); continue; } @@ -879,6 +890,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Find out if this path belongs to af_iucv. */ read_lock(&iucv_sk_list.lock); iucv = NULL; + sk = NULL; sk_for_each(sk, node, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index b7333061016d..983058d432dc 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -479,7 +479,8 @@ static void iucv_setmask_mp(void) /* Enable all cpus with a declared buffer. */ if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) - smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu); + smp_call_function_single(cpu, iucv_allow_cpu, + NULL, 0, 1); preempt_enable(); } @@ -497,7 +498,7 @@ static void iucv_setmask_up(void) cpumask = iucv_irq_cpumask; cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); for_each_cpu_mask(cpu, cpumask) - smp_call_function_on(iucv_block_cpu, NULL, 0, 1, cpu); + smp_call_function_single(cpu, iucv_block_cpu, NULL, 0, 1); } /** @@ -522,7 +523,7 @@ static int iucv_enable(void) rc = -EIO; preempt_disable(); for_each_online_cpu(cpu) - smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ @@ -578,7 +579,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -587,10 +588,10 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, if (cpus_empty(cpumask)) /* Can't offline last IUCV enabled cpu. */ return NOTIFY_BAD; - smp_call_function_on(iucv_retrieve_cpu, NULL, 0, 1, cpu); + smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 0, 1); if (cpus_empty(iucv_irq_cpumask)) - smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, - first_cpu(iucv_buffer_cpumask)); + smp_call_function_single(first_cpu(iucv_buffer_cpumask), + iucv_allow_cpu, NULL, 0, 1); break; } return NOTIFY_OK; @@ -1494,7 +1495,10 @@ static void iucv_tasklet_fn(unsigned long ignored) struct iucv_irq_list *p, *n; /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ - spin_lock(&iucv_table_lock); + if (!spin_trylock(&iucv_table_lock)) { + tasklet_schedule(&iucv_tasklet); + return; + } iucv_active_cpu = smp_processor_id(); spin_lock_irq(&iucv_queue_lock); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0f8304b0246b..5502df115a63 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1206,6 +1206,9 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } + if (!x->sel.family) + x->sel.family = x->props.family; + if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; @@ -2540,7 +2543,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.sport) - sel.sport_mask = ~0; + sel.sport_mask = htons(0xffff); /* set destination address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], @@ -2549,7 +2552,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.dport) - sel.dport_mask = ~0; + sel.dport_mask = htons(0xffff); rq = (struct sadb_x_ipsecrequest *)(pol + 1); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 3ab9d9f8b17f..49be6c902c83 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -184,14 +184,14 @@ out: return 0; } -static struct seq_operations llc_seq_socket_ops = { +static const struct seq_operations llc_seq_socket_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, .show = llc_seq_socket_show, }; -static struct seq_operations llc_seq_core_ops = { +static const struct seq_operations llc_seq_core_ops = { .start = llc_seq_start, .next = llc_seq_next, .stop = llc_seq_stop, diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index e9738dad2d7c..a9c2d0787d4a 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -13,6 +13,7 @@ mac80211-objs := \ ieee80211_iface.o \ ieee80211_rate.o \ michael.o \ + regdomain.o \ tkip.o \ aes_ccm.o \ wme.o \ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9e3964638bad..095be91829ca 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -118,7 +118,7 @@ static ssize_t ieee80211_if_fmt_flags( sdata->u.sta.authenticated ? "AUTH\n" : "", sdata->u.sta.associated ? "ASSOC\n" : "", sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->u.sta.use_protection ? "CTS prot\n" : ""); + sdata->use_protection ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); @@ -271,9 +271,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } -#define DEBUGFS_DEL(name, type)\ - debugfs_remove(sdata->debugfs.type.name);\ - sdata->debugfs.type.name = NULL; +#define DEBUGFS_DEL(name, type) \ + do { \ + debugfs_remove(sdata->debugfs.type.name); \ + sdata->debugfs.type.name = NULL; \ + } while (0) static void del_sta_files(struct ieee80211_sub_if_data *sdata) { @@ -397,6 +399,8 @@ static int netdev_notify(struct notifier_block * nb, void *ndev) { struct net_device *dev = ndev; + struct dentry *dir; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); char buf[10+IFNAMSIZ]; if (state != NETDEV_CHANGENAME) @@ -408,10 +412,11 @@ static int netdev_notify(struct notifier_block * nb, if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) return 0; - /* TODO sprintf(buf, "netdev:%s", dev->name); - debugfs_rename(IEEE80211_DEV_TO_SUB_IF(dev)->debugfsdir, buf); - */ + dir = sdata->debugfsdir; + if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) + printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs " + "dir to %s\n", buf); return 0; } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index d41e696f3980..da34ea70276f 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -157,7 +157,7 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, struct sta_info *sta = file->private_data; for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%x ", - sta->last_seq_ctrl[i]); + le16_to_cpu(sta->last_seq_ctrl[i])); p += scnprintf(p, sizeof(buf)+buf-p, "\n"); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 34fa128e9872..52da513f060a 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -26,24 +26,16 @@ * mess shall be deleted completely. */ enum { PRISM2_PARAM_IEEE_802_1X = 23, - PRISM2_PARAM_ANTSEL_TX = 24, - PRISM2_PARAM_ANTSEL_RX = 25, /* Instant802 additions */ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, - PRISM2_PARAM_DROP_UNENCRYPTED = 1002, PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_CLEAR_KEYS = 1009, PRISM2_PARAM_RADIO_ENABLED = 1010, PRISM2_PARAM_ANTENNA_MODE = 1013, PRISM2_PARAM_STAT_TIME = 1016, PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_FORCE_UNICAST_RATE = 1018, - PRISM2_PARAM_RATE_CTRL_NUM_UP = 1019, - PRISM2_PARAM_RATE_CTRL_NUM_DOWN = 1020, - PRISM2_PARAM_MAX_RATECTRL_RATE = 1021, PRISM2_PARAM_TX_POWER_REDUCTION = 1022, PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 4e84f24fd439..ff2172ffd861 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -24,6 +24,7 @@ #include <linux/compiler.h> #include <linux/bitmap.h> #include <net/cfg80211.h> +#include <asm/unaligned.h> #include "ieee80211_common.h" #include "ieee80211_i.h" @@ -56,6 +57,17 @@ static const unsigned char eapol_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; +/* + * For seeing transmitted packets on monitor interfaces + * we have a radiotap header too. + */ +struct ieee80211_tx_status_rtap_hdr { + struct ieee80211_radiotap_header hdr; + __le16 tx_flags; + u8 data_retries; +} __attribute__ ((packed)); + + static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr) { @@ -430,7 +442,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) if (!tx->u.tx.rate) return TXRX_DROP; if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->local->cts_protect_erp_frames && tx->fragmented && + tx->sdata->use_protection && tx->fragmented && extra.nonerp) { tx->u.tx.last_frag_rate = tx->u.tx.rate; tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; @@ -528,7 +540,7 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* reserve enough extra head and tail room for possible * encryption */ frag = frags[i] = - dev_alloc_skb(tx->local->hw.extra_tx_headroom + + dev_alloc_skb(tx->local->tx_headroom + frag_threshold + IEEE80211_ENCRYPT_HEADROOM + IEEE80211_ENCRYPT_TAILROOM); @@ -537,8 +549,8 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* Make sure that all fragments use the same priority so * that they end up using the same TX queue */ frag->priority = first->priority; - skb_reserve(frag, tx->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); memcpy(fhdr, first->data, hdrlen); if (i == num_fragm - 2) @@ -856,8 +868,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) * for the frame. */ if (mode->mode == MODE_IEEE80211G && (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && - tx->local->cts_protect_erp_frames && + tx->u.tx.unicast && tx->sdata->use_protection && !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; @@ -1118,7 +1129,138 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) } -static void inline +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ + +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap( + struct ieee80211_txrx_data *tx, + struct sk_buff *skb, struct ieee80211_tx_control *control) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + + /* + * default control situation for all injected packets + * FIXME: this does not suit all usage cases, expand to allow control + */ + + control->retry_limit = 1; /* no retry */ + control->key_idx = -1; /* no encryption key */ + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT); + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | + IEEE80211_TXCTL_NO_ACK; + control->antenna_sel_tx = 0; /* default to default antenna */ + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate > target_rate) + continue; + + control->rate = r; + + if (r->flags & IEEE80211_RATE_PREAMBLE2) + control->tx_rate = r->val2; + else + control->tx_rate = r->val; + + /* end on exact match */ + if (r->rate == target_rate) + i = mode->num_rates; + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + break; + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result inline __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, struct sk_buff *skb, struct net_device *dev, @@ -1126,6 +1268,9 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + int hdrlen; memset(tx, 0, sizeof(*tx)); @@ -1135,7 +1280,32 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); tx->sta = sta_info_get(local, hdr->addr1); tx->fc = le16_to_cpu(hdr->frame_control); + + /* + * set defaults for things that can be set by + * injected radiotap headers + */ control->power_level = local->hw.conf.power_level; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) + control->antenna_sel_tx = tx->sta->antenna_sel_tx; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb, control) == + TXRX_DROP) { + return TXRX_DROP; + } + /* + * we removed the radiotap header after this point, + * we filled control with what we could use + * set to the actual ieee header now + */ + hdr = (struct ieee80211_hdr *) skb->data; + res = TXRX_QUEUED; /* indication it was monitor packet */ + } + tx->u.tx.control = control; tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); if (is_multicast_ether_addr(hdr->addr1)) @@ -1152,9 +1322,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; tx->sta->clear_dst_mask = 0; } - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; hdrlen = ieee80211_get_hdrlen(tx->fc); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; @@ -1162,6 +1329,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, } control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + return res; } static int inline is_ieee80211_device(struct net_device *dev, @@ -1274,7 +1442,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, struct sta_info *sta; ieee80211_tx_handler *handler; struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; int ret, i; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1284,15 +1452,26 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, return 0; } - __ieee80211_tx_prepare(&tx, skb, dev, control); + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + sta = tx.sta; tx.u.tx.mgmt_interface = mgmt; tx.u.tx.mode = local->hw.conf.mode; - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; + if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ + res = TXRX_CONTINUE; + } else { + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } } skb = tx.skb; /* handlers are allowed to change skb */ @@ -1467,11 +1646,11 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM; + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; if (skb_headroom(skb) < headroom) { if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); + dev_put(odev); return 0; } } @@ -1494,6 +1673,56 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len; + + /* + * there must be a radiotap header at the + * start in this case + */ + if (unlikely(prthdr->it_version)) { + /* only version 0 is supported */ + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + pkt_data->ifindex = dev->ifindex; + pkt_data->mgmt_iface = 0; + pkt_data->do_not_encrypt = 1; + + /* above needed because we set skb device to master */ + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + len = le16_to_cpu(get_unaligned(&prthdr->it_len)); + skb_set_mac_header(skb, len); + skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); + skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); + + /* + * pass the radiotap header up to + * the next stage intact + */ + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + + /** * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type * subinterfaces (wlan#, WDS, and VLAN interfaces) @@ -1509,8 +1738,8 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, * encapsulated packet will then be passed to master interface, wlan#.11, for * transmission (through low-level driver). */ -static int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_packet_data *pkt_data; @@ -1619,7 +1848,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and * alloc_skb() (net/core/skbuff.c) */ - head_need = hdrlen + encaps_len + local->hw.extra_tx_headroom; + head_need = hdrlen + encaps_len + local->tx_headroom; head_need -= skb_headroom(skb); /* We are going to modify skb data, so make a copy of it if happens to @@ -1658,7 +1887,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; + pkt_data->ifindex = dev->ifindex; pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); pkt_data->do_not_encrypt = no_encrypt; @@ -1706,9 +1935,9 @@ ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } - if (skb_headroom(skb) < sdata->local->hw.extra_tx_headroom) { - if (pskb_expand_head(skb, - sdata->local->hw.extra_tx_headroom, 0, GFP_ATOMIC)) { + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return 0; } @@ -1847,12 +2076,12 @@ struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, bh_len = ap->beacon_head_len; bt_len = ap->beacon_tail_len; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + bh_len + bt_len + 256 /* maximum TIM len */); if (!skb) return NULL; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bh_len), b_head, bh_len); ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); @@ -2376,8 +2605,7 @@ static void ieee80211_start_hard_monitor(struct ieee80211_local *local) struct ieee80211_if_init_conf conf; if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->add_interface) { + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { conf.if_id = -1; conf.type = IEEE80211_IF_TYPE_MNTR; conf.mac_addr = NULL; @@ -2420,21 +2648,14 @@ static int ieee80211_open(struct net_device *dev) } ieee80211_start_soft_monitor(local); - if (local->ops->add_interface) { - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - } else { - if (sdata->type != IEEE80211_IF_TYPE_STA) - return -EOPNOTSUPP; - if (local->open_count > 0) - return -ENOBUFS; + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) { + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + ieee80211_start_hard_monitor(local); + return res; } if (local->open_count == 0) { @@ -2941,34 +3162,6 @@ int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, } EXPORT_SYMBOL(ieee80211_radar_status); -int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, u8 *peer_address, - u16 aid) -{ - struct sk_buff *skb; - struct ieee80211_msg_set_aid_for_sta *msg; - struct ieee80211_local *local = hw_to_local(hw); - - /* unlikely because if this event only happens for APs, - * which require an open ap device. */ - if (unlikely(!local->apdev)) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_msg_set_aid_for_sta)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_msg_set_aid_for_sta *) - skb_put(skb, sizeof(struct ieee80211_msg_set_aid_for_sta)); - memcpy(msg->sta_address, peer_address, ETH_ALEN); - msg->aid = aid; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_set_aid_for_sta); - return 0; -} -EXPORT_SYMBOL(ieee80211_set_aid_for_sta); static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) { @@ -4284,6 +4477,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_local *local = hw_to_local(hw); u16 frag, type; u32 msg_type; + struct ieee80211_tx_status_rtap_hdr *rthdr; + struct ieee80211_sub_if_data *sdata; + int monitors; if (!status) { printk(KERN_ERR @@ -4395,27 +4591,99 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, local->dot11FailedCount++; } - if (!(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) - || unlikely(!local->apdev)) { + msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? + ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + + /* this was a transmitted frame, but now we want to reuse it */ + skb_orphan(skb); + + if ((status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) && + local->apdev) { + if (local->monitors) { + skb2 = skb_clone(skb, GFP_ATOMIC); + } else { + skb2 = skb; + skb = NULL; + } + + if (skb2) + /* Send frame to hostapd */ + ieee80211_rx_mgmt(local, skb2, NULL, msg_type); + + if (!skb) + return; + } + + if (!local->monitors) { dev_kfree_skb(skb); return; } - msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? - ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + /* send frame to monitor interfaces now */ - /* skb was the original skb used for TX. Clone it and give the clone - * to netif_rx(). Free original skb. */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) { + if (skb_headroom(skb) < sizeof(*rthdr)) { + printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } - dev_kfree_skb(skb); - skb = skb2; - /* Send frame to hostapd */ - ieee80211_rx_mgmt(local, skb, NULL, msg_type); + rthdr = (struct ieee80211_tx_status_rtap_hdr*) + skb_push(skb, sizeof(*rthdr)); + + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + + if (!(status->flags & IEEE80211_TX_STATUS_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); + + if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) && + (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); + else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + + rthdr->data_retries = status->retry_count; + + read_lock(&local->sub_if_lock); + monitors = local->monitors; + list_for_each_entry(sdata, &local->sub_if_list, list) { + /* + * Using the monitors counter is possibly racy, but + * if the value is wrong we simply either clone the skb + * once too much or forget sending it to one monitor iface + * The latter case isn't nice but fixing the race is much + * more complicated. + */ + if (!monitors || !skb) + goto out; + + if (sdata->type == IEEE80211_IF_TYPE_MNTR) { + if (!netif_running(sdata->dev)) + continue; + monitors--; + if (monitors) + skb2 = skb_clone(skb, GFP_KERNEL); + else + skb2 = NULL; + skb->dev = sdata->dev; + /* XXX: is this sufficient for BPF? */ + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + skb = skb2; + } + } + out: + read_unlock(&local->sub_if_lock); + if (skb) + dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_tx_status); @@ -4619,6 +4887,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ((sizeof(struct ieee80211_local) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + BUG_ON(!ops->tx); + BUG_ON(!ops->config); + BUG_ON(!ops->add_interface); local->ops = ops; /* for now, mdev needs sub_if_data :/ */ @@ -4647,8 +4918,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->short_retry_limit = 7; local->long_retry_limit = 4; local->hw.conf.radio_enabled = 1; - local->rate_ctrl_num_up = RATE_CONTROL_NUM_UP; - local->rate_ctrl_num_down = RATE_CONTROL_NUM_DOWN; local->enabled_modes = (unsigned int) -1; @@ -4712,6 +4981,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_workqueue; } + /* + * The hardware needs headroom for sending the frame, + * and we need some headroom for passing the frame to monitor + * interfaces, but never both at the same time. + */ + local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, + sizeof(struct ieee80211_tx_status_rtap_hdr)); + debugfs_hw_add(local); local->hw.conf.beacon_int = 1000; @@ -4818,7 +5095,7 @@ int ieee80211_register_hwmode(struct ieee80211_hw *hw, } if (!(hw->flags & IEEE80211_HW_DEFAULT_REG_DOMAIN_CONFIGURED)) - ieee80211_init_client(local->mdev); + ieee80211_set_default_regdomain(mode); return 0; } @@ -4969,6 +5246,7 @@ static int __init ieee80211_init(void) } ieee80211_debugfs_netdev_init(); + ieee80211_regdomain_init(); return 0; } @@ -4981,7 +5259,7 @@ static void __exit ieee80211_exit(void) } -module_init(ieee80211_init); +subsys_initcall(ieee80211_init); module_exit(ieee80211_exit); MODULE_DESCRIPTION("IEEE 802.11 subsystem"); diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index b9a73e7f5f75..77c6afb7f6a8 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -47,21 +47,16 @@ enum ieee80211_msg_type { ieee80211_msg_normal = 0, ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, - ieee80211_msg_passive_scan = 3, + /* hole at 3, was ieee80211_msg_passive_scan but unused */ ieee80211_msg_wep_frame_unknown_key = 4, ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, - ieee80211_msg_set_aid_for_sta = 8 /* used by Intersil MVC driver */, + /* 8 was ieee80211_msg_set_aid_for_sta */ ieee80211_msg_key_threshold_notification = 9, ieee80211_msg_radar = 11, }; -struct ieee80211_msg_set_aid_for_sta { - char sta_address[ETH_ALEN]; - u16 aid; -}; - struct ieee80211_msg_key_notification { int tx_rx_count; char ifname[IFNAMSIZ]; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index af4d14d0b969..6f7bae7ef9c0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -99,6 +99,12 @@ struct ieee80211_sta_bss { int probe_resp; unsigned long last_update; + /* during assocation, we save an ERP value from a probe response so + * that we can feed ERP info to the driver when handling the + * association completes. these fields probably won't be up-to-date + * otherwise, you probably don't want to use them. */ + int has_erp_value; + u8 erp_value; }; @@ -235,7 +241,6 @@ struct ieee80211_if_sta { unsigned int authenticated:1; unsigned int associated:1; unsigned int probereq_poll:1; - unsigned int use_protection:1; unsigned int create_ibss:1; unsigned int mixed_cell:1; unsigned int wmm_enabled:1; @@ -278,6 +283,7 @@ struct ieee80211_sub_if_data { int mc_count; unsigned int allmulti:1; unsigned int promisc:1; + unsigned int use_protection:1; /* CTS protect ERP frames */ struct net_device_stats stats; int drop_unencrypted; @@ -392,6 +398,7 @@ struct ieee80211_local { int monitors; struct iw_statistics wstats; u8 wstats_flags; + int tx_headroom; /* required headroom for hardware/radiotap */ enum { IEEE80211_DEV_UNINITIALIZED = 0, @@ -437,7 +444,6 @@ struct ieee80211_local { int *basic_rates[NUM_IEEE80211_MODES]; int rts_threshold; - int cts_protect_erp_frames; int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ @@ -513,8 +519,6 @@ struct ieee80211_local { STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 } sta_antenna_sel; - int rate_ctrl_num_up, rate_ctrl_num_down; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; @@ -719,6 +723,8 @@ void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); void ieee80211_if_mgmt_setup(struct net_device *dev); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, @@ -753,7 +759,6 @@ void ieee80211_update_default_wep_only(struct ieee80211_local *local); /* ieee80211_ioctl.c */ int ieee80211_set_compression(struct ieee80211_local *local, struct net_device *dev, struct sta_info *sta); -int ieee80211_init_client(struct net_device *dev); int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq); /* ieee80211_sta.c */ void ieee80211_sta_timer(unsigned long data); @@ -792,6 +797,10 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata); int ieee80211_if_add_mgmt(struct ieee80211_local *local); void ieee80211_if_del_mgmt(struct ieee80211_local *local); +/* regdomain.c */ +void ieee80211_regdomain_init(void); +void ieee80211_set_default_regdomain(struct ieee80211_hw_mode *mode); + /* for wiphy privid */ extern void *mac80211_wiphy_privid; diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c index cf0f32e8c2a2..8532a5ccdd1e 100644 --- a/net/mac80211/ieee80211_iface.c +++ b/net/mac80211/ieee80211_iface.c @@ -157,6 +157,8 @@ void ieee80211_if_set_type(struct net_device *dev, int type) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; + dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->type = type; switch (type) { case IEEE80211_IF_TYPE_WDS: @@ -196,6 +198,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type) } case IEEE80211_IF_TYPE_MNTR: dev->type = ARPHRD_IEEE80211_RADIOTAP; + dev->hard_start_xmit = ieee80211_monitor_start_xmit; break; default: printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x", diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 352f03bd8a3a..e7904db55325 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -27,20 +27,6 @@ #include "aes_ccm.h" #include "debugfs_key.h" -static int ieee80211_regdom = 0x10; /* FCC */ -module_param(ieee80211_regdom, int, 0444); -MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain; 64=MKK"); - -/* - * If firmware is upgraded by the vendor, additional channels can be used based - * on the new Japanese regulatory rules. This is indicated by setting - * ieee80211_japan_5ghz module parameter to one when loading the 80211 kernel - * module. - */ -static int ieee80211_japan_5ghz /* = 0 */; -module_param(ieee80211_japan_5ghz, int, 0444); -MODULE_PARM_DESC(ieee80211_japan_5ghz, "Vendor-updated firmware for 5 GHz"); - static void ieee80211_set_hw_encryption(struct net_device *dev, struct sta_info *sta, u8 addr[ETH_ALEN], struct ieee80211_key *key) @@ -345,6 +331,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct iw_range *range = (struct iw_range *) extra; + struct ieee80211_hw_mode *mode = NULL; + int c = 0; data->length = sizeof(struct iw_range); memset(range, 0, sizeof(struct iw_range)); @@ -378,130 +366,34 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; - IW_EVENT_CAPA_SET_KERNEL(range->event_capa); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); - - return 0; -} - - -struct ieee80211_channel_range { - short start_freq; - short end_freq; - unsigned char power_level; - unsigned char antenna_max; -}; - -static const struct ieee80211_channel_range ieee80211_fcc_channels[] = { - { 2412, 2462, 27, 6 } /* IEEE 802.11b/g, channels 1..11 */, - { 5180, 5240, 17, 6 } /* IEEE 802.11a, channels 36..48 */, - { 5260, 5320, 23, 6 } /* IEEE 802.11a, channels 52..64 */, - { 5745, 5825, 30, 6 } /* IEEE 802.11a, channels 149..165, outdoor */, - { 0 } -}; - -static const struct ieee80211_channel_range ieee80211_mkk_channels[] = { - { 2412, 2472, 20, 6 } /* IEEE 802.11b/g, channels 1..13 */, - { 5170, 5240, 20, 6 } /* IEEE 802.11a, channels 34..48 */, - { 5260, 5320, 20, 6 } /* IEEE 802.11a, channels 52..64 */, - { 0 } -}; - - -static const struct ieee80211_channel_range *channel_range = - ieee80211_fcc_channels; - - -static void ieee80211_unmask_channel(struct net_device *dev, int mode, - struct ieee80211_channel *chan) -{ - int i; - - chan->flag = 0; - - if (ieee80211_regdom == 64 && - (mode == MODE_ATHEROS_TURBO || mode == MODE_ATHEROS_TURBOG)) { - /* Do not allow Turbo modes in Japan. */ - return; - } - - for (i = 0; channel_range[i].start_freq; i++) { - const struct ieee80211_channel_range *r = &channel_range[i]; - if (r->start_freq <= chan->freq && r->end_freq >= chan->freq) { - if (ieee80211_regdom == 64 && !ieee80211_japan_5ghz && - chan->freq >= 5260 && chan->freq <= 5320) { - /* - * Skip new channels in Japan since the - * firmware was not marked having been upgraded - * by the vendor. - */ - continue; - } + list_for_each_entry(mode, &local->modes_list, list) { + int i = 0; - if (ieee80211_regdom == 0x10 && - (chan->freq == 5190 || chan->freq == 5210 || - chan->freq == 5230)) { - /* Skip MKK channels when in FCC domain. */ - continue; - } + if (!(local->enabled_modes & (1 << mode->mode)) || + (local->hw_modes & local->enabled_modes & + (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B)) + continue; - chan->flag |= IEEE80211_CHAN_W_SCAN | - IEEE80211_CHAN_W_ACTIVE_SCAN | - IEEE80211_CHAN_W_IBSS; - chan->power_level = r->power_level; - chan->antenna_max = r->antenna_max; - - if (ieee80211_regdom == 64 && - (chan->freq == 5170 || chan->freq == 5190 || - chan->freq == 5210 || chan->freq == 5230)) { - /* - * New regulatory rules in Japan have backwards - * compatibility with old channels in 5.15-5.25 - * GHz band, but the station is not allowed to - * use active scan on these old channels. - */ - chan->flag &= ~IEEE80211_CHAN_W_ACTIVE_SCAN; - } + while (i < mode->num_channels && c < IW_MAX_FREQUENCIES) { + struct ieee80211_channel *chan = &mode->channels[i]; - if (ieee80211_regdom == 64 && - (chan->freq == 5260 || chan->freq == 5280 || - chan->freq == 5300 || chan->freq == 5320)) { - /* - * IBSS is not allowed on 5.25-5.35 GHz band - * due to radar detection requirements. - */ - chan->flag &= ~IEEE80211_CHAN_W_IBSS; + if (chan->flag & IEEE80211_CHAN_W_SCAN) { + range->freq[c].i = chan->chan; + range->freq[c].m = chan->freq * 100000; + range->freq[c].e = 1; + c++; } - - break; - } - } -} - - -static int ieee80211_unmask_channels(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int c; - - list_for_each_entry(mode, &local->modes_list, list) { - for (c = 0; c < mode->num_channels; c++) { - ieee80211_unmask_channel(dev, mode->mode, - &mode->channels[c]); + i++; } } - return 0; -} + range->num_channels = c; + range->num_frequency = c; + IW_EVENT_CAPA_SET_KERNEL(range->event_capa); + IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); + IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); + IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); -int ieee80211_init_client(struct net_device *dev) -{ - if (ieee80211_regdom == 0x40) - channel_range = ieee80211_mkk_channels; - ieee80211_unmask_channels(dev); return 0; } @@ -805,17 +697,24 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { - if (sdata->type == IEEE80211_IF_TYPE_STA || - sdata->type == IEEE80211_IF_TYPE_IBSS) { + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { ssid = sdata->u.sta.ssid; ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->type == IEEE80211_IF_TYPE_AP) { + } + break; + case IEEE80211_IF_TYPE_AP: + if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { ssid = sdata->u.ap.ssid; ssid_len = sdata->u.ap.ssid_len; - } else - return -EINVAL; + } + break; + default: + return -EOPNOTSUPP; } + return ieee80211_sta_req_scan(dev, ssid, ssid_len); } @@ -838,6 +737,67 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, } +static int ieee80211_ioctl_siwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hw_mode *mode; + int i; + u32 target_rate = rate->value / 100000; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (!sdata->bss) + return -ENODEV; + mode = local->oper_hw_mode; + /* target_rate = -1, rate->fixed = 0 means auto only, so use all rates + * target_rate = X, rate->fixed = 1 means only rate X + * target_rate = X, rate->fixed = 0 means all rates <= X */ + sdata->bss->max_ratectrl_rateidx = -1; + sdata->bss->force_unicast_rateidx = -1; + if (rate->value < 0) + return 0; + for (i=0; i< mode->num_rates; i++) { + struct ieee80211_rate *rates = &mode->rates[i]; + int this_rate = rates->rate; + + if (mode->mode == MODE_ATHEROS_TURBO || + mode->mode == MODE_ATHEROS_TURBOG) + this_rate *= 2; + if (target_rate == this_rate) { + sdata->bss->max_ratectrl_rateidx = i; + if (rate->fixed) + sdata->bss->force_unicast_rateidx = i; + break; + } + } + return 0; +} + +static int ieee80211_ioctl_giwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->type == IEEE80211_IF_TYPE_STA) + sta = sta_info_get(local, sdata->u.sta.bssid); + else + return -EOPNOTSUPP; + if (!sta) + return -ENODEV; + if (sta->txrate < local->oper_hw_mode->num_rates) + rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000; + else + rate->value = 0; + sta_info_put(sta); + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -970,118 +930,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static int ieee80211_ioctl_clear_keys(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_key_conf key; - int i; - u8 addr[ETH_ALEN]; - struct ieee80211_key_conf *keyconf; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - memset(addr, 0xff, ETH_ALEN); - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - keyconf = NULL; - if (sdata->keys[i] && - !sdata->keys[i]->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, - sdata->keys[i]))) - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, addr, - keyconf, 0); - kfree(keyconf); - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } - sdata->default_key = NULL; - } - read_unlock(&local->sub_if_lock); - - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - keyconf = NULL; - if (sta->key && !sta->key->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, sta->key))) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, keyconf, sta->aid); - kfree(keyconf); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - spin_unlock_bh(&local->sta_lock); - - memset(&key, 0, sizeof(key)); - if (local->ops->set_key && - local->ops->set_key(local_to_hw(local), REMOVE_ALL_KEYS, - NULL, &key, 0)) - printk(KERN_DEBUG "%s: failed to remove hwaccel keys\n", - dev->name); - - return 0; -} - - -static int -ieee80211_ioctl_force_unicast_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.force_unicast_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.force_unicast_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - -static int -ieee80211_ioctl_max_ratectrl_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.max_ratectrl_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.max_ratectrl_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, struct ieee80211_key *key) { @@ -1205,24 +1053,11 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, sdata->ieee802_1x = value; break; - case PRISM2_PARAM_ANTSEL_TX: - local->hw.conf.antenna_sel_tx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_ANTSEL_RX: - local->hw.conf.antenna_sel_rx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - local->cts_protect_erp_frames = value; - break; - - case PRISM2_PARAM_DROP_UNENCRYPTED: - sdata->drop_unencrypted = value; + if (sdata->type != IEEE80211_IF_TYPE_AP) + ret = -ENOENT; + else + sdata->use_protection = value; break; case PRISM2_PARAM_PREAMBLE: @@ -1251,10 +1086,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->next_mode = value; break; - case PRISM2_PARAM_CLEAR_KEYS: - ret = ieee80211_ioctl_clear_keys(dev); - break; - case PRISM2_PARAM_RADIO_ENABLED: ret = ieee80211_ioctl_set_radio_enabled(dev, value); break; @@ -1269,22 +1100,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->sta_antenna_sel = value; break; - case PRISM2_PARAM_FORCE_UNICAST_RATE: - ret = ieee80211_ioctl_force_unicast_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_MAX_RATECTRL_RATE: - ret = ieee80211_ioctl_max_ratectrl_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - local->rate_ctrl_num_up = value; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - local->rate_ctrl_num_down = value; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: if (value < 0) ret = -EINVAL; @@ -1364,20 +1179,8 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = sdata->ieee802_1x; break; - case PRISM2_PARAM_ANTSEL_TX: - *param = local->hw.conf.antenna_sel_tx; - break; - - case PRISM2_PARAM_ANTSEL_RX: - *param = local->hw.conf.antenna_sel_rx; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = local->cts_protect_erp_frames; - break; - - case PRISM2_PARAM_DROP_UNENCRYPTED: - *param = sdata->drop_unencrypted; + *param = sdata->use_protection; break; case PRISM2_PARAM_PREAMBLE: @@ -1403,14 +1206,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = local->sta_antenna_sel; break; - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - *param = local->rate_ctrl_num_up; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - *param = local->rate_ctrl_num_down; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: *param = local->hw.conf.tx_power_reduction; break; @@ -1778,8 +1573,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWNICKN */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ - (iw_handler) NULL, /* SIOCSIWRATE */ - (iw_handler) NULL, /* SIOCGIWRATE */ + (iw_handler) ieee80211_ioctl_siwrate, /* SIOCSIWRATE */ + (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index 16e850864b8a..2118de04fc35 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -24,11 +24,10 @@ int ieee80211_rate_control_register(struct rate_control_ops *ops) { struct rate_control_alg *alg; - alg = kmalloc(sizeof(*alg), GFP_KERNEL); + alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { return -ENOMEM; } - memset(alg, 0, sizeof(*alg)); alg->ops = ops; mutex_lock(&rate_ctrl_mutex); diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 91b545c144c1..0d99b685df5f 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -25,7 +25,6 @@ #include <linux/wireless.h> #include <linux/random.h> #include <linux/etherdevice.h> -#include <linux/rtnetlink.h> #include <net/iw_handler.h> #include <asm/types.h> @@ -76,33 +75,36 @@ static int ieee80211_sta_config_auth(struct net_device *dev, /* Parsed Information Elements */ struct ieee802_11_elems { + /* pointers to IEs */ u8 *ssid; - u8 ssid_len; u8 *supp_rates; - u8 supp_rates_len; u8 *fh_params; - u8 fh_params_len; u8 *ds_params; - u8 ds_params_len; u8 *cf_params; - u8 cf_params_len; u8 *tim; - u8 tim_len; u8 *ibss_params; - u8 ibss_params_len; u8 *challenge; - u8 challenge_len; u8 *wpa; - u8 wpa_len; u8 *rsn; - u8 rsn_len; u8 *erp_info; - u8 erp_info_len; u8 *ext_supp_rates; - u8 ext_supp_rates_len; u8 *wmm_info; - u8 wmm_info_len; u8 *wmm_param; + + /* length of them, respectively */ + u8 ssid_len; + u8 supp_rates_len; + u8 fh_params_len; + u8 ds_params_len; + u8 cf_params_len; + u8 tim_len; + u8 ibss_params_len; + u8 challenge_len; + u8 wpa_len; + u8 rsn_len; + u8 erp_info_len; + u8 ext_supp_rates_len; + u8 wmm_info_len; u8 wmm_param_len; }; @@ -311,6 +313,25 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, } +static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + + if (use_protection != sdata->use_protection) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" + MAC_FMT ")\n", + dev->name, + use_protection ? "enabled" : "disabled", + MAC_ARG(ifsta->bssid)); + } + sdata->use_protection = use_protection; + } +} + + static void ieee80211_sta_send_associnfo(struct net_device *dev, struct ieee80211_if_sta *ifsta) { @@ -366,6 +387,7 @@ static void ieee80211_set_associated(struct net_device *dev, struct ieee80211_if_sta *ifsta, int assoc) { union iwreq_data wrqu; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ifsta->associated == assoc) return; @@ -374,9 +396,18 @@ static void ieee80211_set_associated(struct net_device *dev, if (assoc) { struct ieee80211_sub_if_data *sdata; + struct ieee80211_sta_bss *bss; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; + + bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + if (bss->has_erp_value) + ieee80211_handle_erp_ie(dev, bss->erp_value); + ieee80211_rx_bss_put(dev, bss); + } + netif_carrier_on(dev); ifsta->prev_bssid_set = 1; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); @@ -384,6 +415,7 @@ static void ieee80211_set_associated(struct net_device *dev, ieee80211_sta_send_associnfo(dev, ifsta); } else { netif_carrier_off(dev); + sdata->use_protection = 0; memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; @@ -1174,6 +1206,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, return; } + /* it probably doesn't, but if the frame includes an ERP value then + * update our stored copy */ + if (elems.erp_info && elems.erp_info_len >= 1) { + struct ieee80211_sta_bss *bss + = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + ieee80211_rx_bss_put(dev, bss); + } + } + printk(KERN_DEBUG "%s: associated\n", dev->name); ifsta->aid = aid; ifsta->ap_capab = capab_info; @@ -1282,10 +1326,9 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; - bss = kmalloc(sizeof(*bss), GFP_ATOMIC); + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); if (!bss) return NULL; - memset(bss, 0, sizeof(*bss)); atomic_inc(&bss->users); atomic_inc(&bss->users); memcpy(bss->bssid, bssid, ETH_ALEN); @@ -1496,6 +1539,12 @@ static void ieee80211_rx_bss_info(struct net_device *dev, return; } + /* save the ERP value so that it is available at association time */ + if (elems.erp_info && elems.erp_info_len >= 1) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + } + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) { @@ -1611,10 +1660,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; - int use_protection; size_t baselen; struct ieee802_11_elems elems; @@ -1638,23 +1685,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, &elems) == ParseFailed) return; - use_protection = 0; - if (elems.erp_info && elems.erp_info_len >= 1) { - use_protection = - (elems.erp_info[0] & ERP_INFO_USE_PROTECTION) != 0; - } - - if (use_protection != !!ifsta->use_protection) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - MAC_FMT ")\n", - dev->name, - use_protection ? "enabled" : "disabled", - MAC_ARG(ifsta->bssid)); - } - ifsta->use_protection = use_protection ? 1 : 0; - local->cts_protect_erp_frames = use_protection; - } + if (elems.erp_info && elems.erp_info_len >= 1) + ieee80211_handle_erp_ie(dev, elems.erp_info[0]); if (elems.wmm_param && ifsta->wmm_enabled) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, @@ -2073,12 +2105,9 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - rtnl_lock(); - if (!ifsta->auto_channel_sel && !ifsta->auto_bssid_sel && !ifsta->auto_ssid_sel) { ifsta->state = IEEE80211_AUTHENTICATE; - rtnl_unlock(); ieee80211_sta_reset_auth(dev, ifsta); return 0; } @@ -2121,18 +2150,20 @@ static int ieee80211_sta_config_auth(struct net_device *dev, ieee80211_sta_set_bssid(dev, selected->bssid); ieee80211_rx_bss_put(dev, selected); ifsta->state = IEEE80211_AUTHENTICATE; - rtnl_unlock(); ieee80211_sta_reset_auth(dev, ifsta); return 0; } else { if (ifsta->state != IEEE80211_AUTHENTICATE) { - ieee80211_sta_start_scan(dev, NULL, 0); + if (ifsta->auto_ssid_sel) + ieee80211_sta_start_scan(dev, NULL, 0); + else + ieee80211_sta_start_scan(dev, ifsta->ssid, + ifsta->ssid_len); ifsta->state = IEEE80211_AUTHENTICATE; set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); } else ifsta->state = IEEE80211_DISABLED; } - rtnl_unlock(); return -1; } diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 2048cfd1ca70..17b9f46bbf2b 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -187,9 +187,13 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, } #endif - if (per_failed > local->rate_ctrl_num_down) { + /* + * XXX: Make these configurable once we have an + * interface to the rate control algorithms + */ + if (per_failed > RATE_CONTROL_NUM_DOWN) { rate_control_rate_dec(local, sta); - } else if (per_failed < local->rate_ctrl_num_up) { + } else if (per_failed < RATE_CONTROL_NUM_UP) { rate_control_rate_inc(local, sta); } srctrl->tx_avg_rate_sum += status->control.rate->rate; @@ -283,14 +287,16 @@ static void rate_control_simple_rate_init(void *priv, void *priv_sta, int i; sta->txrate = 0; mode = local->oper_hw_mode; - /* TODO: what is a good starting rate for STA? About middle? Maybe not - * the lowest or the highest rate.. Could consider using RSSI from - * previous packets? Need to have IEEE 802.1X auth succeed immediately - * after assoc.. */ + /* TODO: This routine should consider using RSSI from previous packets + * as we need to have IEEE 802.1X auth succeed immediately after assoc.. + * Until that method is implemented, we will use the lowest supported rate + * as a workaround, */ for (i = 0; i < mode->num_rates; i++) { if ((sta->supp_rates & BIT(i)) && - (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED)) + (mode->rates[i].flags & IEEE80211_RATE_SUPPORTED)) { sta->txrate = i; + break; + } } } @@ -425,7 +431,7 @@ static void __exit rate_control_simple_exit(void) } -module_init(rate_control_simple_init); +subsys_initcall(rate_control_simple_init); module_exit(rate_control_simple_exit); MODULE_DESCRIPTION("Simple rate control algorithm for ieee80211"); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 89ce81529694..7ab82b376e1b 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -424,7 +424,7 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct rtattr *opt) skb_queue_head_init(&q->requeued[i]); q->queues[i] = qdisc_create_dflt(qd->dev, &pfifo_qdisc_ops, qd->handle); - if (q->queues[i] == 0) { + if (!q->queues[i]) { q->queues[i] = &noop_qdisc; printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i); } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a567dae8e5fd..3599770a2473 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -102,6 +102,16 @@ config NF_CT_PROTO_SCTP If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NF_CT_PROTO_UDPLITE + tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on UDP-Lite + connections. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NF_CONNTRACK @@ -343,6 +353,18 @@ config NETFILTER_XT_TARGET_NOTRACK If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_TARGET_TRACE + tristate '"TRACE" target support' + depends on NETFILTER_XTABLES + depends on IP_NF_RAW || IP6_NF_RAW + help + The TRACE target allows you to mark packets so that the kernel + will log every rule which match the packets as those traverse + the tables, chains, rules. + + If you want to compile it as a module, say M here and read + <file:Documentation/modules.txt>. If unsure, say `N'. + config NETFILTER_XT_TARGET_SECMARK tristate '"SECMARK" target support' depends on NETFILTER_XTABLES && NETWORK_SECMARK @@ -411,6 +433,14 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLIMIT + tristate '"connlimit" match support"' + depends on NETFILTER_XTABLES + depends on NF_CONNTRACK + ---help--- + This match allows you to match against the number of parallel + connections to a server per client IP address (or address block). + config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NETFILTER_XTABLES @@ -635,6 +665,19 @@ config NETFILTER_XT_MATCH_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_U32 + tristate '"u32" match support' + depends on NETFILTER_XTABLES + ---help--- + u32 allows you to extract quantities of up to 4 bytes from a packet, + AND them with specified masks, shift them by specified amounts and + test whether the results are in any of a set of specified ranges. + The specification of what to extract is general enough to skip over + headers with lengths stored in the packet, as in IP or TCP header + lengths. + + Details and examples are in the kernel module source. + config NETFILTER_XT_MATCH_HASHLIMIT tristate '"hashlimit" match support' depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b2b5c7566b26..0c054bf27973 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -16,6 +16,7 @@ obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o # SCTP protocol connection tracking obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o @@ -44,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o @@ -51,6 +53,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o @@ -72,4 +75,5 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o +obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a84478ee2ded..381a77cf0c9e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -203,7 +203,9 @@ int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_shared(*pskb) || skb_cloned(*pskb)) + if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) + goto copy_skb; + if (skb_shared(*pskb)) goto copy_skb; return pskb_may_pull(*pskb, writable_len); @@ -229,13 +231,13 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, { __be32 diff[] = { ~from, to }; if (skb->ip_summed != CHECKSUM_PARTIAL) { - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial((char *)diff, sizeof(diff), + skb->csum = ~csum_partial(diff, sizeof(diff), ~skb->csum); } else if (pseudohdr) - *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff), + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } EXPORT_SYMBOL(nf_proto_csum_replace4); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 0568f2e86b59..e42ab230ad88 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -142,23 +142,22 @@ static int amanda_help(struct sk_buff **pskb, if (port == 0 || len > 5) break; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, len, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } out: @@ -175,9 +174,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(10080), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }, { .name = "amanda", @@ -188,9 +184,6 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = __constant_htons(10080), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, }, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7a15e30356f2..0fe11889ce14 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -36,15 +36,10 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_extend.h> #define NF_CONNTRACK_VERSION "0.5.0" -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - DEFINE_RWLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -52,57 +47,27 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock); atomic_t nf_conntrack_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(nf_conntrack_count); -void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); -EXPORT_SYMBOL_GPL(nf_conntrack_destroyed); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct list_head *nf_conntrack_hash __read_mostly; +struct hlist_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; -LIST_HEAD(unconfirmed); +HLIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc __read_mostly; - +static struct kmem_cache *nf_conntrack_cachep __read_mostly; static unsigned int nf_conntrack_next_id; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); -/* - * This scheme offers various size of "struct nf_conn" dependent on - * features(helper, nat, ...) - */ - -#define NF_CT_FEATURES_NAMELEN 256 -static struct { - /* name of slab cache. printed in /proc/slabinfo */ - char *name; - - /* size of slab cache */ - size_t size; - - /* slab cache pointer */ - struct kmem_cache *cachep; - - /* allocated slab cache + modules which uses this slab cache */ - int use; - -} nf_ct_cache[NF_CT_F_NUM]; - -/* protect members of nf_ct_cache except of "use" */ -DEFINE_RWLOCK(nf_ct_cache_lock); - -/* This avoids calling kmem_cache_create() with same name simultaneously */ -static DEFINE_MUTEX(nf_ct_cache_mutex); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -114,7 +79,8 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all), (tuple->src.l3num << 16) | tuple->dst.protonum); b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), - (tuple->src.u.all << 16) | tuple->dst.u.all); + ((__force __u16)tuple->src.u.all << 16) | + (__force __u16)tuple->dst.u.all); return jhash_2words(a, b, rnd) % size; } @@ -125,122 +91,6 @@ static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) nf_conntrack_hash_rnd); } -int nf_conntrack_register_cache(u_int32_t features, const char *name, - size_t size) -{ - int ret = 0; - char *cache_name; - struct kmem_cache *cachep; - - DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", - features, name, size); - - if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { - DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", - features); - return -EINVAL; - } - - mutex_lock(&nf_ct_cache_mutex); - - write_lock_bh(&nf_ct_cache_lock); - /* e.g: multiple helpers are loaded */ - if (nf_ct_cache[features].use > 0) { - DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); - if ((!strncmp(nf_ct_cache[features].name, name, - NF_CT_FEATURES_NAMELEN)) - && nf_ct_cache[features].size == size) { - DEBUGP("nf_conntrack_register_cache: reusing.\n"); - nf_ct_cache[features].use++; - ret = 0; - } else - ret = -EBUSY; - - write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); - return ret; - } - write_unlock_bh(&nf_ct_cache_lock); - - /* - * The memory space for name of slab cache must be alive until - * cache is destroyed. - */ - cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); - if (cache_name == NULL) { - DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); - ret = -ENOMEM; - goto out_up_mutex; - } - - if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) - >= NF_CT_FEATURES_NAMELEN) { - printk("nf_conntrack_register_cache: name too long\n"); - ret = -EINVAL; - goto out_free_name; - } - - cachep = kmem_cache_create(cache_name, size, 0, 0, - NULL, NULL); - if (!cachep) { - printk("nf_conntrack_register_cache: Can't create slab cache " - "for the features = 0x%x\n", features); - ret = -ENOMEM; - goto out_free_name; - } - - write_lock_bh(&nf_ct_cache_lock); - nf_ct_cache[features].use = 1; - nf_ct_cache[features].size = size; - nf_ct_cache[features].cachep = cachep; - nf_ct_cache[features].name = cache_name; - write_unlock_bh(&nf_ct_cache_lock); - - goto out_up_mutex; - -out_free_name: - kfree(cache_name); -out_up_mutex: - mutex_unlock(&nf_ct_cache_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_conntrack_register_cache); - -/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ -void nf_conntrack_unregister_cache(u_int32_t features) -{ - struct kmem_cache *cachep; - char *name; - - /* - * This assures that kmem_cache_create() isn't called before destroying - * slab cache. - */ - DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); - mutex_lock(&nf_ct_cache_mutex); - - write_lock_bh(&nf_ct_cache_lock); - if (--nf_ct_cache[features].use > 0) { - write_unlock_bh(&nf_ct_cache_lock); - mutex_unlock(&nf_ct_cache_mutex); - return; - } - cachep = nf_ct_cache[features].cachep; - name = nf_ct_cache[features].name; - nf_ct_cache[features].cachep = NULL; - nf_ct_cache[features].name = NULL; - nf_ct_cache[features].size = 0; - write_unlock_bh(&nf_ct_cache_lock); - - synchronize_net(); - - kmem_cache_destroy(cachep); - kfree(name); - - mutex_unlock(&nf_ct_cache_mutex); -} -EXPORT_SYMBOL_GPL(nf_conntrack_unregister_cache); - int nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -264,6 +114,36 @@ nf_ct_get_tuple(const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_ct_get_tuple); +int nf_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + u_int16_t l3num, + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_l4proto *l4proto; + unsigned int protoff; + u_int8_t protonum; + int ret; + + rcu_read_lock(); + + l3proto = __nf_ct_l3proto_find(l3num); + ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); + if (ret != NF_ACCEPT) { + rcu_read_unlock(); + return 0; + } + + l4proto = __nf_ct_l4proto_find(l3num, protonum); + + ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, + l3proto, l4proto); + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); + int nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig, @@ -286,9 +166,9 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); static void clean_from_lists(struct nf_conn *ct) { - DEBUGP("clean_from_lists(%p)\n", ct); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); + pr_debug("clean_from_lists(%p)\n", ct); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -299,9 +179,8 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; struct nf_conntrack_l4proto *l4proto; - typeof(nf_conntrack_destroyed) destroyed; - DEBUGP("destroy_conntrack(%p)\n", ct); + pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); @@ -317,9 +196,7 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l4proto && l4proto->destroy) l4proto->destroy(ct); - destroyed = rcu_dereference(nf_conntrack_destroyed); - if (destroyed) - destroyed(ct); + nf_ct_ext_destroy(ct); rcu_read_unlock(); @@ -332,8 +209,8 @@ destroy_conntrack(struct nf_conntrack *nfct) /* We overload first tuple to link into unconfirmed list. */ if (!nf_ct_is_confirmed(ct)) { - BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } NF_CT_STAT_INC(delete); @@ -342,7 +219,7 @@ destroy_conntrack(struct nf_conntrack *nfct) if (ct->master) nf_ct_put(ct->master); - DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); nf_conntrack_free(ct); } @@ -374,9 +251,10 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct nf_conntrack_tuple_hash *h; + struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); - list_for_each_entry(h, &nf_conntrack_hash[hash], list) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); @@ -391,13 +269,12 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; read_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_find(tuple, ignored_conntrack); + h = __nf_conntrack_find(tuple, NULL); if (h) atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); read_unlock_bh(&nf_conntrack_lock); @@ -411,10 +288,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, - &nf_conntrack_hash[hash]); - list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, - &nf_conntrack_hash[repl_hash]); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &nf_conntrack_hash[hash]); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -438,6 +315,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; + struct hlist_node *n; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -460,24 +338,24 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* No external references means noone else could have confirmed us. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - DEBUGP("Confirming conntrack %p\n", ct); + pr_debug("Confirming conntrack %p\n", ct); write_lock_bh(&nf_conntrack_lock); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - list_for_each_entry(h, &nf_conntrack_hash[hash], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original @@ -524,24 +402,33 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); +#define NF_CT_EVICTION_RANGE 8 + /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static int early_drop(struct list_head *chain) +static int early_drop(unsigned int hash) { - /* Traverse backwards: gives us oldest, which is roughly LRU */ + /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; struct nf_conn *ct = NULL, *tmp; + struct hlist_node *n; + unsigned int i, cnt = 0; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - list_for_each_entry_reverse(h, chain, list) { - tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { - ct = tmp; - atomic_inc(&ct->ct_general.use); - break; + for (i = 0; i < nf_conntrack_htable_size; i++) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) + ct = tmp; + cnt++; } + if (ct || cnt >= NF_CT_EVICTION_RANGE) + break; + hash = (hash + 1) % nf_conntrack_htable_size; } + if (ct) + atomic_inc(&ct->ct_general.use); read_unlock_bh(&nf_conntrack_lock); if (!ct) @@ -556,14 +443,10 @@ static int early_drop(struct list_head *chain) return dropped; } -static struct nf_conn * -__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl, - const struct nf_conntrack_l3proto *l3proto, - u_int32_t features) +struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) { struct nf_conn *conntrack = NULL; - struct nf_conntrack_helper *helper; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -576,8 +459,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, if (nf_conntrack_max && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { unsigned int hash = hash_conntrack(orig); - /* Try dropping from this hash chain. */ - if (!early_drop(&nf_conntrack_hash[hash])) { + if (!early_drop(hash)) { atomic_dec(&nf_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING @@ -587,72 +469,28 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - /* find features needed by this conntrack. */ - features |= l3proto->get_features(orig); - - /* FIXME: protect helper list per RCU */ - read_lock_bh(&nf_conntrack_lock); - helper = __nf_ct_helper_find(repl); - /* NAT might want to assign a helper later */ - if (helper || features & NF_CT_F_NAT) - features |= NF_CT_F_HELP; - read_unlock_bh(&nf_conntrack_lock); - - DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); - - read_lock_bh(&nf_ct_cache_lock); - - if (unlikely(!nf_ct_cache[features].use)) { - DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", - features); - goto out; - } - - conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); + conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); if (conntrack == NULL) { - DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); - goto out; + pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); + atomic_dec(&nf_conntrack_count); + return ERR_PTR(-ENOMEM); } - memset(conntrack, 0, nf_ct_cache[features].size); - conntrack->features = features; atomic_set(&conntrack->ct_general.use, 1); conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&conntrack->timeout, death_by_timeout, (unsigned long)conntrack); - read_unlock_bh(&nf_ct_cache_lock); return conntrack; -out: - read_unlock_bh(&nf_ct_cache_lock); - atomic_dec(&nf_conntrack_count); - return conntrack; -} - -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_tuple *repl) -{ - struct nf_conntrack_l3proto *l3proto; - struct nf_conn *ct; - - rcu_read_lock(); - l3proto = __nf_ct_l3proto_find(orig->src.l3num); - ct = __nf_conntrack_alloc(orig, repl, l3proto, 0); - rcu_read_unlock(); - - return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *conntrack) { - u_int32_t features = conntrack->features; - NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); - DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, - conntrack); - kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + nf_ct_ext_free(conntrack); + kmem_cache_free(nf_conntrack_cachep, conntrack); atomic_dec(&nf_conntrack_count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -670,43 +508,38 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; - u_int32_t features = 0; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { - DEBUGP("Can't invert tuple.\n"); + pr_debug("Can't invert tuple.\n"); return NULL; } - read_lock_bh(&nf_conntrack_lock); - exp = __nf_conntrack_expect_find(tuple); - if (exp && exp->helper) - features = NF_CT_F_HELP; - read_unlock_bh(&nf_conntrack_lock); - - conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto, features); + conntrack = nf_conntrack_alloc(tuple, &repl_tuple); if (conntrack == NULL || IS_ERR(conntrack)) { - DEBUGP("Can't allocate conntrack.\n"); + pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)conntrack; } if (!l4proto->new(conntrack, skb, dataoff)) { nf_conntrack_free(conntrack); - DEBUGP("init conntrack: can't track with proto module\n"); + pr_debug("init conntrack: can't track with proto module\n"); return NULL; } write_lock_bh(&nf_conntrack_lock); - exp = find_expectation(tuple); - - help = nfct_help(conntrack); + exp = nf_ct_find_expectation(tuple); if (exp) { - DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, exp); + pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, exp); /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; - if (exp->helper) - rcu_assign_pointer(help->helper, exp->helper); + if (exp->helper) { + help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + if (help) + rcu_assign_pointer(help->helper, exp->helper); + } + #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif @@ -716,23 +549,27 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else { - if (help) { - /* not in hash table yet, so not strictly necessary */ - rcu_assign_pointer(help->helper, - __nf_ct_helper_find(&repl_tuple)); + struct nf_conntrack_helper *helper; + + helper = __nf_ct_helper_find(&repl_tuple); + if (helper) { + help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + if (help) + rcu_assign_pointer(help->helper, helper); } NF_CT_STAT_INC(new); } /* Overload tuple linked list to put us in unconfirmed list. */ - list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); + hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &unconfirmed); write_unlock_bh(&nf_conntrack_lock); if (exp) { if (exp->expectfn) exp->expectfn(conntrack, exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; @@ -756,12 +593,12 @@ resolve_normal_ct(struct sk_buff *skb, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, l4proto)) { - DEBUGP("resolve_normal_ct: Can't get tuple\n"); + pr_debug("resolve_normal_ct: Can't get tuple\n"); return NULL; } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) { h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); if (!h) @@ -779,13 +616,14 @@ resolve_normal_ct(struct sk_buff *skb, } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); + pr_debug("nf_conntrack_in: normal packet for %p\n", ct); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { - DEBUGP("nf_conntrack_in: related packet for %p\n", ct); + pr_debug("nf_conntrack_in: related packet for %p\n", + ct); *ctinfo = IP_CT_RELATED; } else { - DEBUGP("nf_conntrack_in: new packet for %p\n", ct); + pr_debug("nf_conntrack_in: new packet for %p\n", ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; @@ -815,9 +653,12 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - - if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { - DEBUGP("not prepared to track yet or error occured\n"); + ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + &dataoff, &protonum); + if (ret <= 0) { + pr_debug("not prepared to track yet or error occured\n"); + NF_CT_STAT_INC_ATOMIC(error); + NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } @@ -853,7 +694,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ - DEBUGP("nf_conntrack_in: Can't track with proto module\n"); + pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); @@ -888,23 +729,36 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply) { struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; write_lock_bh(&nf_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - DEBUGP("Altering reply tuple of %p to ", ct); + pr_debug("Altering reply tuple of %p to ", ct); NF_CT_DUMP_TUPLE(newreply); ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (!ct->master && help && help->expecting == 0) { - struct nf_conntrack_helper *helper; - helper = __nf_ct_helper_find(newreply); - if (helper) - memset(&help->help, 0, sizeof(help->help)); - /* not in hash table yet, so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); + if (ct->master || (help && help->expecting != 0)) + goto out; + + helper = __nf_ct_helper_find(newreply); + if (helper == NULL) { + if (help) + rcu_assign_pointer(help->helper, NULL); + goto out; } + + if (help == NULL) { + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + if (help == NULL) + goto out; + } else { + memset(&help->help, 0, sizeof(help->help)); + } + + rcu_assign_pointer(help->helper, helper); +out: write_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -1048,16 +902,17 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + struct hlist_node *n; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { + hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; } } - list_for_each_entry(h, &unconfirmed, list) { + hlist_for_each_entry(h, n, &unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -1092,14 +947,15 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) +void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size) { if (vmalloced) vfree(hash); else free_pages((unsigned long)hash, - get_order(sizeof(struct list_head) * size)); + get_order(sizeof(struct hlist_head) * size)); } +EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); void nf_conntrack_flush(void) { @@ -1111,8 +967,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush); supposed to kill the mall. */ void nf_conntrack_cleanup(void) { - int i; - rcu_assign_pointer(ip_ct_attach, NULL); /* This makes sure all current packets have passed through @@ -1133,49 +987,46 @@ void nf_conntrack_cleanup(void) rcu_assign_pointer(nf_ct_destroy, NULL); - for (i = 0; i < NF_CT_F_NUM; i++) { - if (nf_ct_cache[i].use == 0) - continue; - - NF_CT_ASSERT(nf_ct_cache[i].use == 1); - nf_ct_cache[i].use = 1; - nf_conntrack_unregister_cache(i); - } - kmem_cache_destroy(nf_conntrack_expect_cachep); - free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, - nf_conntrack_htable_size); + kmem_cache_destroy(nf_conntrack_cachep); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); nf_conntrack_proto_fini(); + nf_conntrack_helper_fini(); + nf_conntrack_expect_fini(); } -static struct list_head *alloc_hashtable(int size, int *vmalloced) +struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) { - struct list_head *hash; - unsigned int i; + struct hlist_head *hash; + unsigned int size, i; *vmalloced = 0; + + size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); hash = (void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) + get_order(sizeof(struct hlist_head) * size)); if (!hash) { *vmalloced = 1; printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); - hash = vmalloc(sizeof(struct list_head) * size); + hash = vmalloc(sizeof(struct hlist_head) * size); } if (hash) for (i = 0; i < size; i++) - INIT_LIST_HEAD(&hash[i]); + INIT_HLIST_HEAD(&hash[i]); return hash; } +EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int set_hashsize(const char *val, struct kernel_param *kp) { int i, bucket, hashsize, vmalloced; int old_vmalloced, old_size; int rnd; - struct list_head *hash, *old_hash; + struct hlist_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; /* On boot, we can set this without any fancy locking. */ @@ -1186,7 +1037,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) if (!hashsize) return -EINVAL; - hash = alloc_hashtable(hashsize, &vmalloced); + hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); if (!hash) return -ENOMEM; @@ -1196,12 +1047,12 @@ int set_hashsize(const char *val, struct kernel_param *kp) write_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!list_empty(&nf_conntrack_hash[i])) { - h = list_entry(nf_conntrack_hash[i].next, - struct nf_conntrack_tuple_hash, list); - list_del(&h->list); + while (!hlist_empty(&nf_conntrack_hash[i])) { + h = hlist_entry(nf_conntrack_hash[i].first, + struct nf_conntrack_tuple_hash, hnode); + hlist_del(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); - list_add_tail(&h->list, &hash[bucket]); + hlist_add_head(&h->hnode, &hash[bucket]); } } old_size = nf_conntrack_htable_size; @@ -1214,7 +1065,7 @@ int set_hashsize(const char *val, struct kernel_param *kp) nf_conntrack_hash_rnd = rnd; write_unlock_bh(&nf_conntrack_lock); - free_conntrack_hash(old_hash, old_vmalloced, old_size); + nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); return 0; } @@ -1223,50 +1074,58 @@ module_param_call(hashsize, set_hashsize, param_get_uint, int __init nf_conntrack_init(void) { + int max_factor = 8; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { nf_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct list_head)); + / sizeof(struct hlist_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) - nf_conntrack_htable_size = 8192; - if (nf_conntrack_htable_size < 16) - nf_conntrack_htable_size = 16; + nf_conntrack_htable_size = 16384; + if (nf_conntrack_htable_size < 32) + nf_conntrack_htable_size = 32; + + /* Use a max. factor of four by default to get the same max as + * with the old struct list_heads. When a table size is given + * we use the old value of 8 to avoid reducing the max. + * entries. */ + max_factor = 4; } - nf_conntrack_max = 8 * nf_conntrack_htable_size; - - printk("nf_conntrack version %s (%u buckets, %d max)\n", - NF_CONNTRACK_VERSION, nf_conntrack_htable_size, - nf_conntrack_max); - - nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, - &nf_conntrack_vmalloc); + nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &nf_conntrack_vmalloc); if (!nf_conntrack_hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } - ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", - sizeof(struct nf_conn)); - if (ret < 0) { + nf_conntrack_max = max_factor * nf_conntrack_htable_size; + + printk("nf_conntrack version %s (%u buckets, %d max)\n", + NF_CONNTRACK_VERSION, nf_conntrack_htable_size, + nf_conntrack_max); + + nf_conntrack_cachep = kmem_cache_create("nf_conntrack", + sizeof(struct nf_conn), + 0, 0, NULL); + if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); goto err_free_hash; } - nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL, NULL); - if (!nf_conntrack_expect_cachep) { - printk(KERN_ERR "Unable to create nf_expect slab cache\n"); + ret = nf_conntrack_proto_init(); + if (ret < 0) goto err_free_conntrack_slab; - } - ret = nf_conntrack_proto_init(); + ret = nf_conntrack_expect_init(); + if (ret < 0) + goto out_fini_proto; + + ret = nf_conntrack_helper_init(); if (ret < 0) - goto out_free_expect_slab; + goto out_fini_expect; /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); @@ -1280,13 +1139,15 @@ int __init nf_conntrack_init(void) return ret; -out_free_expect_slab: - kmem_cache_destroy(nf_conntrack_expect_cachep); +out_fini_expect: + nf_conntrack_expect_fini(); +out_fini_proto: + nf_conntrack_proto_fini(); err_free_conntrack_slab: - nf_conntrack_unregister_cache(NF_CT_F_BASIC); + kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: - free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, - nf_conntrack_htable_size); + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_conntrack_htable_size); err_out: return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 6bd421df2dbc..83c41ac3505b 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,8 +26,8 @@ ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); EXPORT_SYMBOL_GPL(nf_conntrack_chain); -ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); +ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); +EXPORT_SYMBOL_GPL(nf_ct_expect_chain); DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); @@ -103,14 +103,14 @@ int nf_conntrack_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_conntrack_expect_register_notifier(struct notifier_block *nb) +int nf_ct_expect_register_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb); + return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier); +EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb) +int nf_ct_expect_unregister_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb); + return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier); +EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 504fb6c083f9..3ac64e25f10c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -19,6 +19,7 @@ #include <linux/err.h> #include <linux/percpu.h> #include <linux/kernel.h> +#include <linux/jhash.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -26,11 +27,20 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_tuple.h> -LIST_HEAD(nf_conntrack_expect_list); -EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); +struct hlist_head *nf_ct_expect_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hash); -struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; -static unsigned int nf_conntrack_expect_next_id; +unsigned int nf_ct_expect_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); + +static unsigned int nf_ct_expect_hash_rnd __read_mostly; +static unsigned int nf_ct_expect_count; +unsigned int nf_ct_expect_max __read_mostly; +static int nf_ct_expect_hash_rnd_initted __read_mostly; +static int nf_ct_expect_vmalloc; + +static struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static unsigned int nf_ct_expect_next_id; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) @@ -40,60 +50,83 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); - NF_CT_STAT_INC(expect_delete); + hlist_del(&exp->hnode); + nf_ct_expect_count--; + + hlist_del(&exp->lnode); master_help->expecting--; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); + + NF_CT_STAT_INC(expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); -static void expectation_timed_out(unsigned long ul_expect) +static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; write_lock_bh(&nf_conntrack_lock); nf_ct_unlink_expect(exp); write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); +} + +static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) +{ + if (unlikely(!nf_ct_expect_hash_rnd_initted)) { + get_random_bytes(&nf_ct_expect_hash_rnd, 4); + nf_ct_expect_hash_rnd_initted = 1; + } + + return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), + (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | + (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % + nf_ct_expect_hsize; } struct nf_conntrack_expect * -__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; + struct hlist_node *n; + unsigned int h; + + if (!nf_ct_expect_count) + return NULL; - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + h = nf_ct_expect_dst_hash(tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; read_lock_bh(&nf_conntrack_lock); - i = __nf_conntrack_expect_find(tuple); + i = __nf_ct_expect_find(tuple); if (i) atomic_inc(&i->use); read_unlock_bh(&nf_conntrack_lock); return i; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *exp; - exp = __nf_conntrack_expect_find(tuple); + exp = __nf_ct_expect_find(tuple); if (!exp) return NULL; @@ -119,17 +152,18 @@ find_expectation(const struct nf_conntrack_tuple *tuple) /* delete all expectations for this conntrack */ void nf_ct_remove_expectations(struct nf_conn *ct) { - struct nf_conntrack_expect *i, *tmp; struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; /* Optimization: most connection never expect any others. */ if (!help || help->expecting == 0) return; - list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); + hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } } @@ -141,25 +175,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, { /* Part covered by intersection of masks must be unequal, otherwise they clash */ - struct nf_conntrack_tuple intersect_mask; + struct nf_conntrack_tuple_mask intersect_mask; int count; - intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; - intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; - intersect_mask.dst.protonum = a->mask.dst.protonum - & b->mask.dst.protonum; for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ intersect_mask.src.u3.all[count] = a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ - intersect_mask.dst.u3.all[count] = - a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; - } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); } @@ -168,36 +193,29 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_equal(&a->mask, &b->mask); + && nf_ct_tuple_mask_equal(&a->mask, &b->mask); } /* Generally a bad idea to call this: could have matched already. */ -void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - struct nf_conntrack_expect *i; - write_lock_bh(&nf_conntrack_lock); - /* choose the oldest expectation to evict */ - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (expect_matches(i, exp) && del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - write_unlock_bh(&nf_conntrack_lock); - nf_conntrack_expect_put(i); - return; - } + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } write_unlock_bh(&nf_conntrack_lock); } -EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); /* We don't increase the master conntrack refcount for non-fulfilled * conntracks. During the conntrack destruction, the expectations are * always killed before the conntrack itself */ -struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) +struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) { struct nf_conntrack_expect *new; - new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC); if (!new) return NULL; @@ -205,12 +223,12 @@ struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) atomic_set(&new->use, 1); return new; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc); +EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); -void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, - union nf_conntrack_address *saddr, - union nf_conntrack_address *daddr, - u_int8_t proto, __be16 *src, __be16 *dst) +void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, + union nf_conntrack_address *saddr, + union nf_conntrack_address *daddr, + u_int8_t proto, __be16 *src, __be16 *dst) { int len; @@ -224,8 +242,6 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, exp->helper = NULL; exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; - exp->mask.src.l3num = 0xFFFF; - exp->mask.dst.protonum = 0xFF; if (saddr) { memcpy(&exp->tuple.src.u3, saddr, len); @@ -242,59 +258,50 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family, memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3)); } - if (daddr) { - memcpy(&exp->tuple.dst.u3, daddr, len); - if (sizeof(exp->tuple.dst.u3) > len) - /* address needs to be cleared for nf_ct_tuple_equal */ - memset((void *)&exp->tuple.dst.u3 + len, 0x00, - sizeof(exp->tuple.dst.u3) - len); - memset(&exp->mask.dst.u3, 0xFF, len); - if (sizeof(exp->mask.dst.u3) > len) - memset((void *)&exp->mask.dst.u3 + len, 0x00, - sizeof(exp->mask.dst.u3) - len); - } else { - memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3)); - memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3)); - } - if (src) { - exp->tuple.src.u.all = (__force u16)*src; - exp->mask.src.u.all = 0xFFFF; + exp->tuple.src.u.all = *src; + exp->mask.src.u.all = htons(0xFFFF); } else { exp->tuple.src.u.all = 0; exp->mask.src.u.all = 0; } - if (dst) { - exp->tuple.dst.u.all = (__force u16)*dst; - exp->mask.dst.u.all = 0xFFFF; - } else { - exp->tuple.dst.u.all = 0; - exp->mask.dst.u.all = 0; - } + memcpy(&exp->tuple.dst.u3, daddr, len); + if (sizeof(exp->tuple.dst.u3) > len) + /* address needs to be cleared for nf_ct_tuple_equal */ + memset((void *)&exp->tuple.dst.u3 + len, 0x00, + sizeof(exp->tuple.dst.u3) - len); + + exp->tuple.dst.u.all = *dst; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_init); +EXPORT_SYMBOL_GPL(nf_ct_expect_init); -void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) +void nf_ct_expect_put(struct nf_conntrack_expect *exp) { if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_conntrack_expect_cachep, exp); + kmem_cache_free(nf_ct_expect_cachep, exp); } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_put); +EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); atomic_inc(&exp->use); + + hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting++; - list_add(&exp->list, &nf_conntrack_expect_list); - setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp); + hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); + nf_ct_expect_count++; + + setup_timer(&exp->timeout, nf_ct_expectation_timed_out, + (unsigned long)exp); exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; add_timer(&exp->timeout); - exp->id = ++nf_conntrack_expect_next_id; + exp->id = ++nf_ct_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -302,16 +309,16 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) /* Race with expectations being used means we could have none to find; OK. */ static void evict_oldest_expect(struct nf_conn *master) { - struct nf_conntrack_expect *i; + struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_expect *exp = NULL; + struct hlist_node *n; - list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_conntrack_expect_put(i); - } - break; - } + hlist_for_each_entry(exp, n, &master_help->expectations, lnode) + ; /* nothing */ + + if (exp && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); } } @@ -327,11 +334,13 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) return 1; } -int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related(struct nf_conntrack_expect *expect) { struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct hlist_node *n; + unsigned int h; int ret; NF_CT_ASSERT(master_help); @@ -341,7 +350,8 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) ret = -ESHUTDOWN; goto out; } - list_for_each_entry(i, &nf_conntrack_expect_list, list) { + h = nf_ct_expect_dst_hash(&expect->tuple); + hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -358,57 +368,86 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) master_help->expecting >= master_help->helper->max_expected) evict_oldest_expect(master); - nf_conntrack_expect_insert(expect); - nf_conntrack_expect_event(IPEXP_NEW, expect); + if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: expectation table full"); + ret = -EMFILE; + goto out; + } + + nf_ct_expect_insert(expect); + nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&nf_conntrack_lock); return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_expect_related); +EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS -static void *exp_seq_start(struct seq_file *s, loff_t *pos) +struct ct_expect_iter_state { + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { - struct list_head *e = &nf_conntrack_expect_list; - loff_t i; + struct ct_expect_iter_state *st = seq->private; - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&nf_conntrack_lock); + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) + return nf_ct_expect_hash[st->bucket].first; + } + return NULL; +} - if (list_empty(e)) - return NULL; +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct ct_expect_iter_state *st = seq->private; - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &nf_conntrack_expect_list) + head = head->next; + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) return NULL; + head = nf_ct_expect_hash[st->bucket].first; } - return e; + return head; } -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *e = v; + struct hlist_node *head = ct_expect_get_first(seq); - ++*pos; - e = e->next; + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} - if (e == &nf_conntrack_expect_list) - return NULL; +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&nf_conntrack_lock); + return ct_expect_get_idx(seq, *pos); +} - return e; +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); } -static void exp_seq_stop(struct seq_file *s, void *v) +static void exp_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&nf_conntrack_lock); } static int exp_seq_show(struct seq_file *s, void *v) { - struct nf_conntrack_expect *expect = v; + struct nf_conntrack_expect *expect; + struct hlist_node *n = v; + + expect = hlist_entry(n, struct nf_conntrack_expect, hnode); if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) @@ -425,7 +464,7 @@ static int exp_seq_show(struct seq_file *s, void *v) return seq_putc(s, '\n'); } -static struct seq_operations exp_seq_ops = { +static const struct seq_operations exp_seq_ops = { .start = exp_seq_start, .next = exp_seq_next, .stop = exp_seq_stop, @@ -434,14 +473,95 @@ static struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open(file, &exp_seq_ops); + struct seq_file *seq; + struct ct_expect_iter_state *st; + int ret; + + st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (!st) + return -ENOMEM; + ret = seq_open(file, &exp_seq_ops); + if (ret) + goto out_free; + seq = file->private_data; + seq->private = st; + return ret; +out_free: + kfree(st); + return ret; } -const struct file_operations exp_file_ops = { +static const struct file_operations exp_file_ops = { .owner = THIS_MODULE, .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; #endif /* CONFIG_PROC_FS */ + +static int __init exp_proc_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + + proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops); + if (!proc) + return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; +} + +static void exp_proc_remove(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove("nf_conntrack_expect"); +#endif /* CONFIG_PROC_FS */ +} + +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); + +int __init nf_conntrack_expect_init(void) +{ + int err = -ENOMEM; + + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; + + nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &nf_ct_expect_vmalloc); + if (nf_ct_expect_hash == NULL) + goto err1; + + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL); + if (!nf_ct_expect_cachep) + goto err2; + + err = exp_proc_init(); + if (err < 0) + goto err3; + + return 0; + +err3: + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); +err2: + kmem_cache_destroy(nf_ct_expect_cachep); +err1: + return err; +} + +void nf_conntrack_expect_fini(void) +{ + exp_proc_remove(); + kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_expect_hsize); +} diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 82db2aa53bfc..c763ee74ea02 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -51,12 +51,6 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, @@ -138,13 +132,13 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], if (*data == term && i == array_size - 1) return len; - DEBUGP("Char %u (got %u nums) `%u' unexpected\n", - len, i, *data); + pr_debug("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); return 0; } } - DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); - + pr_debug("Failed to fill %u numbers separated by %c\n", + array_size, sep); return 0; } @@ -178,13 +172,13 @@ static int get_port(const char *data, int start, size_t dlen, char delim, if (tmp_port == 0) break; *port = htons(tmp_port); - DEBUGP("get_port: return %d\n", tmp_port); + pr_debug("get_port: return %d\n", tmp_port); return i + 1; } else if (data[i] >= '0' && data[i] <= '9') tmp_port = tmp_port*10 + data[i] - '0'; else { /* Some other crap */ - DEBUGP("get_port: invalid char.\n"); + pr_debug("get_port: invalid char.\n"); break; } } @@ -201,22 +195,22 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, then delimiter again. */ if (dlen <= 3) { - DEBUGP("EPRT: too short\n"); + pr_debug("EPRT: too short\n"); return 0; } delim = data[0]; if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { - DEBUGP("try_eprt: invalid delimitter.\n"); + pr_debug("try_eprt: invalid delimitter.\n"); return 0; } if ((cmd->l3num == PF_INET && data[1] != '1') || (cmd->l3num == PF_INET6 && data[1] != '2')) { - DEBUGP("EPRT: invalid protocol number.\n"); + pr_debug("EPRT: invalid protocol number.\n"); return 0; } - DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); + pr_debug("EPRT: Got %c%c%c\n", delim, data[1], delim); if (data[1] == '1') { u_int32_t array[4]; @@ -234,7 +228,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, if (length == 0) return 0; - DEBUGP("EPRT: Got IP address!\n"); + pr_debug("EPRT: Got IP address!\n"); /* Start offset includes initial "|1|", and trailing delimiter */ return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); } @@ -267,7 +261,7 @@ static int find_pattern(const char *data, size_t dlen, { size_t i; - DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); + pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) return 0; @@ -282,17 +276,17 @@ static int find_pattern(const char *data, size_t dlen, #if 0 size_t i; - DEBUGP("ftp: string mismatch\n"); + pr_debug("ftp: string mismatch\n"); for (i = 0; i < plen; i++) { - DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); + pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); } #endif return 0; } - DEBUGP("Pattern matches!\n"); + pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ for (i = plen; data[i] != skip; i++) @@ -301,14 +295,14 @@ static int find_pattern(const char *data, size_t dlen, /* Skip over the last character */ i++; - DEBUGP("Skipped up to `%c'!\n", skip); + pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; *numlen = getnum(data + i, dlen - i, cmd, term); if (!*numlen) return -1; - DEBUGP("Match succeeded!\n"); + pr_debug("Match succeeded!\n"); return 1; } @@ -364,6 +358,7 @@ static int help(struct sk_buff **pskb, unsigned int matchlen, matchoff; struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; struct nf_conntrack_expect *exp; + union nf_conntrack_address *daddr; struct nf_conntrack_man cmd = {}; unsigned int i; int found = 0, ends_in_nl; @@ -372,7 +367,7 @@ static int help(struct sk_buff **pskb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { - DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } @@ -383,8 +378,8 @@ static int help(struct sk_buff **pskb, dataoff = protoff + th->doff * 4; /* No data? */ if (dataoff >= (*pskb)->len) { - DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - (*pskb)->len); + pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, + (*pskb)->len); return NF_ACCEPT; } datalen = (*pskb)->len - dataoff; @@ -399,11 +394,11 @@ static int help(struct sk_buff **pskb, /* Look up to see if we're just after a \n. */ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { /* Now if this ends in \n, update ftp info. */ - DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", - ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", - ct_ftp_info->seq_aft_nl[dir][0], - ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", - ct_ftp_info->seq_aft_nl[dir][1]); + pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n", + ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][0], + ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][1]); ret = NF_ACCEPT; goto out_update_nl; } @@ -441,11 +436,11 @@ static int help(struct sk_buff **pskb, goto out_update_nl; } - DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", - (int)matchlen, fb_ptr + matchoff, - matchlen, ntohl(th->seq) + matchoff); + pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + matchlen, fb_ptr + matchoff, + matchlen, ntohl(th->seq) + matchoff); - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; @@ -454,7 +449,7 @@ static int help(struct sk_buff **pskb, /* We refer to the reverse direction ("!dir") tuples here, * because we're expecting something in the other direction. * Doesn't matter unless NAT is happening. */ - exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; + daddr = &ct->tuplehash[!dir].tuple.dst.u3; /* Update the ftp info */ if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && @@ -465,14 +460,16 @@ static int help(struct sk_buff **pskb, different IP address. Simply don't record it for NAT. */ if (cmd.l3num == PF_INET) { - DEBUGP("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT " != " NIPQUAD_FMT "\n", - NIPQUAD(cmd.u3.ip), - NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); + pr_debug("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT + " != " NIPQUAD_FMT "\n", + NIPQUAD(cmd.u3.ip), + NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); } else { - DEBUGP("conntrack_ftp: NOT RECORDING: " NIP6_FMT " != " NIP6_FMT "\n", - NIP6(*((struct in6_addr *)cmd.u3.ip6)), - NIP6(*((struct in6_addr *)ct->tuplehash[dir] - .tuple.src.u3.ip6))); + pr_debug("conntrack_ftp: NOT RECORDING: " NIP6_FMT + " != " NIP6_FMT "\n", + NIP6(*((struct in6_addr *)cmd.u3.ip6)), + NIP6(*((struct in6_addr *) + ct->tuplehash[dir].tuple.src.u3.ip6))); } /* Thanks to Cristiano Lincoln Mattos @@ -483,37 +480,12 @@ static int help(struct sk_buff **pskb, ret = NF_ACCEPT; goto out_put_expect; } - memcpy(&exp->tuple.dst.u3, &cmd.u3.all, - sizeof(exp->tuple.dst.u3)); - } - - exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3; - exp->tuple.src.l3num = cmd.l3num; - exp->tuple.src.u.tcp.port = 0; - exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; - exp->tuple.dst.protonum = IPPROTO_TCP; - - exp->mask = (struct nf_conntrack_tuple) - { .src = { .l3num = 0xFFFF, - .u = { .tcp = { 0 }}, - }, - .dst = { .protonum = 0xFF, - .u = { .tcp = { __constant_htons(0xFFFF) }}, - }, - }; - if (cmd.l3num == PF_INET) { - exp->mask.src.u3.ip = htonl(0xFFFFFFFF); - exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); - } else { - memset(exp->mask.src.u3.ip6, 0xFF, - sizeof(exp->mask.src.u3.ip6)); - memset(exp->mask.dst.u3.ip6, 0xFF, - sizeof(exp->mask.src.u3.ip6)); + daddr = &cmd.u3; } - exp->expectfn = NULL; - exp->helper = NULL; - exp->flags = 0; + nf_ct_expect_init(exp, cmd.l3num, + &ct->tuplehash[!dir].tuple.src.u3, daddr, + IPPROTO_TCP, NULL, &cmd.u.tcp.port); /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ @@ -523,14 +495,14 @@ static int help(struct sk_buff **pskb, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; else ret = NF_ACCEPT; } out_put_expect: - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been @@ -542,8 +514,8 @@ out_update_nl: return ret; } -static struct nf_conntrack_helper ftp[MAX_PORTS][2]; -static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")]; +static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; +static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly; /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_ftp_fini(void) @@ -554,9 +526,9 @@ static void nf_conntrack_ftp_fini(void) if (ftp[i][j].me == NULL) continue; - DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " - "port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_ftp: unregistering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&ftp[i][j]); } } @@ -584,9 +556,6 @@ static int __init nf_conntrack_ftp_init(void) for (j = 0; j < 2; j++) { ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; - ftp[i][j].mask.src.l3num = 0xFFFF; - ftp[i][j].mask.src.u.tcp.port = htons(0xFFFF); - ftp[i][j].mask.dst.protonum = 0xFF; ftp[i][j].max_expected = 1; ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ ftp[i][j].me = THIS_MODULE; @@ -598,9 +567,9 @@ static int __init nf_conntrack_ftp_init(void) sprintf(tmpname, "ftp-%d", ports[i]); ftp[i][j].name = tmpname; - DEBUGP("nf_ct_ftp: registering helper for pf: %d " - "port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_ftp: registering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&ftp[i][j]); if (ret) { printk("nf_ct_ftp: failed to register helper " diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 6b7eaa019d4c..a869403b2294 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -555,15 +555,6 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) /* Decode the extension components */ for (opt = 0; opt < bmp2_len; opt++, i++, son++) { - if (i < f->ub && son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", - son->name); - return H323_ERROR_STOP; - } - - if (!((0x80000000 >> opt) & bmp2)) /* Not present */ - continue; - /* Check Range */ if (i >= f->ub) { /* Newer Version? */ CHECK_BOUND(bs, 2); @@ -573,6 +564,15 @@ int decode_seq(bitstr_t * bs, field_t * f, char *base, int level) continue; } + if (son->attr & STOP) { + PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + son->name); + return H323_ERROR_STOP; + } + + if (!((0x80000000 >> opt) & bmp2)) /* Not present */ + continue; + CHECK_BOUND(bs, 2); len = get_len(bs); CHECK_BOUND(bs, len); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a1b95acad297..a8a9dfbe7a67 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -31,12 +31,6 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_h323.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - /* Parameters */ static unsigned int default_rrq_ttl __read_mostly = 300; module_param(default_rrq_ttl, uint, 0600); @@ -150,9 +144,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) { /* Netmeeting sends TPKT header and data separately */ if (info->tpkt_len[dir] > 0) { - DEBUGP("nf_ct_h323: previous packet " - "indicated separate TPKT data of %hu " - "bytes\n", info->tpkt_len[dir]); + pr_debug("nf_ct_h323: previous packet " + "indicated separate TPKT data of %hu " + "bytes\n", info->tpkt_len[dir]); if (info->tpkt_len[dir] <= tcpdatalen) { /* Yes, there was a TPKT header * received */ @@ -163,9 +157,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, } /* Fragmented TPKT */ - if (net_ratelimit()) - printk("nf_ct_h323: " - "fragmented TPKT\n"); + pr_debug("nf_ct_h323: fragmented TPKT\n"); goto clear_out; } @@ -192,9 +184,9 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, if (tpktlen > tcpdatalen) { if (tcpdatalen == 4) { /* Separate TPKT header */ /* Netmeeting sends TPKT header and data separately */ - DEBUGP("nf_ct_h323: separate TPKT header indicates " - "there will be TPKT data of %hu bytes\n", - tpktlen - 4); + pr_debug("nf_ct_h323: separate TPKT header indicates " + "there will be TPKT data of %hu bytes\n", + tpktlen - 4); info->tpkt_len[dir] = tpktlen - 4; return 0; } @@ -282,22 +274,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, rtcp_port = htons(ntohs(port) + 1); /* Create expect for RTP */ - if ((rtp_exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_UDP, NULL, &rtp_port); + nf_ct_expect_init(rtp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtp_port); /* Create expect for RTCP */ - if ((rtcp_exp = nf_conntrack_expect_alloc(ct)) == NULL) { - nf_conntrack_expect_put(rtp_exp); + if ((rtcp_exp = nf_ct_expect_alloc(ct)) == NULL) { + nf_ct_expect_put(rtp_exp); return -1; } - nf_conntrack_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_UDP, NULL, &rtcp_port); + nf_ct_expect_init(rtcp_exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_UDP, NULL, &rtcp_port); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, @@ -308,22 +300,22 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(rtp_exp) == 0) { - if (nf_conntrack_expect_related(rtcp_exp) == 0) { - DEBUGP("nf_ct_h323: expect RTP "); + if (nf_ct_expect_related(rtp_exp) == 0) { + if (nf_ct_expect_related(rtcp_exp) == 0) { + pr_debug("nf_ct_h323: expect RTP "); NF_CT_DUMP_TUPLE(&rtp_exp->tuple); - DEBUGP("nf_ct_h323: expect RTCP "); + pr_debug("nf_ct_h323: expect RTCP "); NF_CT_DUMP_TUPLE(&rtcp_exp->tuple); } else { - nf_conntrack_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtp_exp); ret = -1; } } else ret = -1; } - nf_conntrack_expect_put(rtp_exp); - nf_conntrack_expect_put(rtcp_exp); + nf_ct_expect_put(rtp_exp); + nf_ct_expect_put(rtcp_exp); return ret; } @@ -349,12 +341,12 @@ static int expect_t120(struct sk_buff **pskb, return 0; /* Create expect for T.120 connections */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */ if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -366,14 +358,14 @@ static int expect_t120(struct sk_buff **pskb, ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_h323: expect T.120 "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_h323: expect T.120 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -415,7 +407,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, { int ret; - DEBUGP("nf_ct_h323: OpenLogicalChannel\n"); + pr_debug("nf_ct_h323: OpenLogicalChannel\n"); if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) @@ -475,7 +467,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, H2250LogicalChannelAckParameters *ack; int ret; - DEBUGP("nf_ct_h323: OpenLogicalChannelAck\n"); + pr_debug("nf_ct_h323: OpenLogicalChannelAck\n"); if ((olca->options & eOpenLogicalChannelAck_reverseLogicalChannelParameters) && @@ -546,8 +538,8 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, return process_olc(pskb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } - DEBUGP("nf_ct_h323: H.245 Request %d\n", - mscm->request.choice); + pr_debug("nf_ct_h323: H.245 Request %d\n", + mscm->request.choice); break; case eMultimediaSystemControlMessage_response: if (mscm->response.choice == @@ -556,11 +548,11 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, &mscm->response. openLogicalChannelAck); } - DEBUGP("nf_ct_h323: H.245 Response %d\n", - mscm->response.choice); + pr_debug("nf_ct_h323: H.245 Response %d\n", + mscm->response.choice); break; default: - DEBUGP("nf_ct_h323: H.245 signal %d\n", mscm->choice); + pr_debug("nf_ct_h323: H.245 signal %d\n", mscm->choice); break; } @@ -582,24 +574,23 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - DEBUGP("nf_ct_h245: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { - DEBUGP("nf_ct_h245: TPKT len=%d ", datalen); + pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode H.245 signal */ ret = DecodeMultimediaSystemControlMessage(data, datalen, &mscm); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_h245: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_h245: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -626,8 +617,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */, .timeout = 240, .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = h245_help }; @@ -684,12 +673,12 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Create expect for h245 connection */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->helper = &nf_conntrack_helper_h245; if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -701,14 +690,14 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_q931: expect H.245 "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_q931: expect H.245 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -791,16 +780,16 @@ static int expect_callforwarding(struct sk_buff **pskb, if (callforward_filter && callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.l3num)) { - DEBUGP("nf_ct_q931: Call Forwarding not tracked\n"); + pr_debug("nf_ct_q931: Call Forwarding not tracked\n"); return 0; } /* Create expect for the second call leg */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->helper = nf_conntrack_helper_q931; if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -812,14 +801,14 @@ static int expect_callforwarding(struct sk_buff **pskb, ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_q931: expect Call Forwarding "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_q931: expect Call Forwarding "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -837,7 +826,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; typeof(set_h225_addr_hook) set_h225_addr; - DEBUGP("nf_ct_q931: Setup\n"); + pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -852,11 +841,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { - DEBUGP("nf_ct_q931: set destCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), - ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); + pr_debug("nf_ct_q931: set destCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), + ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); ret = set_h225_addr(pskb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, @@ -870,11 +859,11 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { - DEBUGP("nf_ct_q931: set sourceCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), - ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); + pr_debug("nf_ct_q931: set sourceCallSignalAddress " + NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", + NIP6(*(struct in6_addr *)&addr), ntohs(port), + NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), + ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); ret = set_h225_addr(pskb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, @@ -905,7 +894,7 @@ static int process_callproceeding(struct sk_buff **pskb, int ret; int i; - DEBUGP("nf_ct_q931: CallProceeding\n"); + pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -935,7 +924,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Connect\n"); + pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -965,7 +954,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Alerting\n"); + pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -995,7 +984,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Facility\n"); + pr_debug("nf_ct_q931: Facility\n"); if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) @@ -1034,7 +1023,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, int ret; int i; - DEBUGP("nf_ct_q931: Progress\n"); + pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, @@ -1091,8 +1080,8 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, &pdu->h323_message_body.progress); break; default: - DEBUGP("nf_ct_q931: Q.931 signal %d\n", - pdu->h323_message_body.choice); + pr_debug("nf_ct_q931: Q.931 signal %d\n", + pdu->h323_message_body.choice); break; } @@ -1126,23 +1115,22 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - DEBUGP("nf_ct_q931: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { - DEBUGP("nf_ct_q931: TPKT len=%d ", datalen); + pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode Q.931 signal */ ret = DecodeQ931(data, datalen, &q931); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_q931: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_q931: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); /* We don't drop when decoding error */ break; } @@ -1173,9 +1161,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.tcp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = q931_help }, { @@ -1187,9 +1172,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.tcp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = q931_help }, }; @@ -1225,7 +1207,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_conntrack_expect_find(&tuple); + exp = __nf_ct_expect_find(&tuple); if (exp && exp->master == ct) return exp; return NULL; @@ -1271,14 +1253,13 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Create expect for Q.931 */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - gkrouted_only ? /* only accept calls from GK? */ - &ct->tuplehash[!dir].tuple.src.u3 : - NULL, - &ct->tuplehash[!dir].tuple.dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + gkrouted_only ? /* only accept calls from GK? */ + &ct->tuplehash[!dir].tuple.src.u3 : NULL, + &ct->tuplehash[!dir].tuple.dst.u3, + IPPROTO_TCP, NULL, &port); exp->helper = nf_conntrack_helper_q931; exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ @@ -1286,8 +1267,8 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); /* Save port for looking up expect in processing RCF */ @@ -1296,7 +1277,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, ret = -1; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1308,7 +1289,7 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, { typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: GRQ\n"); + pr_debug("nf_ct_ras: GRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ @@ -1328,7 +1309,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; struct nf_conntrack_expect *exp; - DEBUGP("nf_ct_ras: GCF\n"); + pr_debug("nf_ct_ras: GCF\n"); if (!get_h225_addr(ct, *data, &gcf->rasAddress, &addr, &port)) return 0; @@ -1343,20 +1324,20 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, return 0; /* Need new expect */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_UDP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_UDP, NULL, &port); exp->helper = nf_conntrack_helper_ras; - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect RAS "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_ras: expect RAS "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1370,7 +1351,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, int ret; typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: RRQ\n"); + pr_debug("nf_ct_ras: RRQ\n"); ret = expect_q931(pskb, ct, ctinfo, data, rrq->callSignalAddress.item, @@ -1388,7 +1369,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, } if (rrq->options & eRegistrationRequest_timeToLive) { - DEBUGP("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); + pr_debug("nf_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); info->timeout = rrq->timeToLive; } else info->timeout = default_rrq_ttl; @@ -1407,7 +1388,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, struct nf_conntrack_expect *exp; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: RCF\n"); + pr_debug("nf_ct_ras: RCF\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { @@ -1419,14 +1400,13 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, } if (rcf->options & eRegistrationConfirm_timeToLive) { - DEBUGP("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); + pr_debug("nf_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); info->timeout = rcf->timeToLive; } if (info->timeout > 0) { - DEBUGP - ("nf_ct_ras: set RAS connection timeout to %u seconds\n", - info->timeout); + pr_debug("nf_ct_ras: set RAS connection timeout to " + "%u seconds\n", info->timeout); nf_ct_refresh(ct, *pskb, info->timeout * HZ); /* Set expect timeout */ @@ -1434,9 +1414,9 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, info->sig_port[!dir]); if (exp) { - DEBUGP("nf_ct_ras: set Q.931 expect " - "timeout to %u seconds for", - info->timeout); + pr_debug("nf_ct_ras: set Q.931 expect " + "timeout to %u seconds for", + info->timeout); NF_CT_DUMP_TUPLE(&exp->tuple); set_expect_timeout(exp, info->timeout); } @@ -1456,7 +1436,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, int ret; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: URQ\n"); + pr_debug("nf_ct_ras: URQ\n"); set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { @@ -1489,7 +1469,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; typeof(set_h225_addr_hook) set_h225_addr; - DEBUGP("nf_ct_ras: ARQ\n"); + pr_debug("nf_ct_ras: ARQ\n"); set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((arq->options & eAdmissionRequest_destCallSignalAddress) && @@ -1532,7 +1512,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, struct nf_conntrack_expect *exp; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: ACF\n"); + pr_debug("nf_ct_ras: ACF\n"); if (!get_h225_addr(ct, *data, &acf->destCallSignalAddress, &addr, &port)) @@ -1548,21 +1528,21 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, } /* Need new expect */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -1574,7 +1554,7 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, { typeof(set_ras_addr_hook) set_ras_addr; - DEBUGP("nf_ct_ras: LRQ\n"); + pr_debug("nf_ct_ras: LRQ\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) @@ -1594,28 +1574,28 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, union nf_conntrack_address addr; struct nf_conntrack_expect *exp; - DEBUGP("nf_ct_ras: LCF\n"); + pr_debug("nf_ct_ras: LCF\n"); if (!get_h225_addr(ct, *data, &lcf->callSignalAddress, &addr, &port)) return 0; /* Need new expect for call signal */ - if ((exp = nf_conntrack_expect_alloc(ct)) == NULL) + if ((exp = nf_ct_expect_alloc(ct)) == NULL) return -1; - nf_conntrack_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, - &ct->tuplehash[!dir].tuple.src.u3, &addr, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, ct->tuplehash[!dir].tuple.src.l3num, + &ct->tuplehash[!dir].tuple.src.u3, &addr, + IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_conntrack_expect_related(exp) == 0) { - DEBUGP("nf_ct_ras: expect Q.931 "); + if (nf_ct_expect_related(exp) == 0) { + pr_debug("nf_ct_ras: expect Q.931 "); NF_CT_DUMP_TUPLE(&exp->tuple); } else ret = -1; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); /* Ignore rasAddress */ @@ -1631,7 +1611,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, typeof(set_ras_addr_hook) set_ras_addr; typeof(set_sig_addr_hook) set_sig_addr; - DEBUGP("nf_ct_ras: IRR\n"); + pr_debug("nf_ct_ras: IRR\n"); set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { @@ -1690,7 +1670,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, return process_irr(pskb, ct, ctinfo, data, &ras->infoRequestResponse); default: - DEBUGP("nf_ct_ras: RAS message %d\n", ras->choice); + pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); break; } @@ -1706,7 +1686,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, int datalen = 0; int ret; - DEBUGP("nf_ct_ras: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); @@ -1714,16 +1694,15 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, data = get_udp_data(pskb, protoff, &datalen); if (data == NULL) goto accept; - DEBUGP("nf_ct_ras: RAS message len=%d ", datalen); + pr_debug("nf_ct_ras: RAS message len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); /* Decode RAS message */ ret = DecodeRasMessage(data, datalen, &ras); if (ret < 0) { - if (net_ratelimit()) - printk("nf_ct_ras: decoding error: %s\n", - ret == H323_ERROR_BOUND ? - "out of bound" : "out of range"); + pr_debug("nf_ct_ras: decoding error: %s\n", + ret == H323_ERROR_BOUND ? + "out of bound" : "out of range"); goto accept; } @@ -1752,9 +1731,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = ras_help, }, { @@ -1765,9 +1741,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = __constant_htons(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .help = ras_help, }, }; @@ -1780,7 +1753,7 @@ static void __exit nf_conntrack_h323_fini(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); kfree(h323_buffer); - DEBUGP("nf_ct_h323: fini\n"); + pr_debug("nf_ct_h323: fini\n"); } /****************************************************************************/ @@ -1803,7 +1776,7 @@ static int __init nf_conntrack_h323_init(void) ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); if (ret < 0) goto err4; - DEBUGP("nf_ct_h323: init success\n"); + pr_debug("nf_ct_h323: init success\n"); return 0; err4: diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f868b7fbd9b4..96aa637c0932 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -26,23 +26,43 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_extend.h> -static __read_mostly LIST_HEAD(helpers); +static struct hlist_head *nf_ct_helper_hash __read_mostly; +static unsigned int nf_ct_helper_hsize __read_mostly; +static unsigned int nf_ct_helper_count __read_mostly; +static int nf_ct_helper_vmalloc; + + +/* Stupid hash, but collision free for the default registrations of the + * helpers currently in the kernel. */ +static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) +{ + return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ + (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; +} struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_helper *h; + struct nf_conntrack_helper *helper; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; + struct hlist_node *n; + unsigned int h; + + if (!nf_ct_helper_count) + return NULL; - list_for_each_entry(h, &helpers, list) { - if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) - return h; + h = helper_hash(tuple); + hlist_for_each_entry(helper, n, &nf_ct_helper_hash[h], hnode) { + if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) + return helper; } return NULL; } struct nf_conntrack_helper * -nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) +nf_ct_helper_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; @@ -75,16 +95,32 @@ struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name) { struct nf_conntrack_helper *h; + struct hlist_node *n; + unsigned int i; - list_for_each_entry(h, &helpers, list) { - if (!strcmp(h->name, name)) - return h; + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry(h, n, &nf_ct_helper_hash[i], hnode) { + if (!strcmp(h->name, name)) + return h; + } } - return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); + if (help) + INIT_HLIST_HEAD(&help->expectations); + else + pr_debug("failed to add helper extension area"); + return help; +} +EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); + static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { @@ -100,20 +136,13 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int size, ret; + unsigned int h = helper_hash(&me->tuple); BUG_ON(me->timeout == 0); - size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_help)) + - sizeof(struct nf_conn_help); - ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", - size); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_helper_register: Unable to create slab cache for conntracks\n"); - return ret; - } write_lock_bh(&nf_conntrack_lock); - list_add(&me->list, &helpers); + hlist_add_head(&me->hnode, &nf_ct_helper_hash[h]); + nf_ct_helper_count++; write_unlock_bh(&nf_conntrack_lock); return 0; @@ -122,29 +151,34 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { - unsigned int i; struct nf_conntrack_tuple_hash *h; - struct nf_conntrack_expect *exp, *tmp; + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; + unsigned int i; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - list_del(&me->list); + hlist_del(&me->hnode); + nf_ct_helper_count--; /* Get rid of expectations */ - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { - struct nf_conn_help *help = nfct_help(exp->master); - if ((help->helper == me || exp->helper == me) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], hnode) { + struct nf_conn_help *help = nfct_help(exp->master); + if ((help->helper == me || exp->helper == me) && + del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } /* Get rid of expecteds, set helpers to NULL. */ - list_for_each_entry(h, &unconfirmed, list) + hlist_for_each_entry(h, n, &unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - list_for_each_entry(h, &nf_conntrack_hash[i], list) + hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) unhelp(h, me); } write_unlock_bh(&nf_conntrack_lock); @@ -153,3 +187,38 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) synchronize_net(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); + +static struct nf_ct_ext_type helper_extend __read_mostly = { + .len = sizeof(struct nf_conn_help), + .align = __alignof__(struct nf_conn_help), + .id = NF_CT_EXT_HELPER, +}; + +int nf_conntrack_helper_init(void) +{ + int err; + + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, + &nf_ct_helper_vmalloc); + if (!nf_ct_helper_hash) + return -ENOMEM; + + err = nf_ct_extend_register(&helper_extend); + if (err < 0) + goto err1; + + return 0; + +err1: + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, + nf_ct_helper_hsize); + return err; +} + +void nf_conntrack_helper_fini(void) +{ + nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, + nf_ct_helper_hsize); +} diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 43ccd0e2e8ae..1562ca97a349 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -12,6 +12,7 @@ #include <linux/moduleparam.h> #include <linux/skbuff.h> #include <linux/in.h> +#include <linux/ip.h> #include <linux/tcp.h> #include <linux/netfilter.h> @@ -55,13 +56,6 @@ static const char *dccprotos[] = { #define MINMATCHLEN 5 -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - /* tries to get the ip_addr and port out of a dcc command * return value: -1 on failure, 0 on success * data pointer to first byte of DCC command data @@ -99,6 +93,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; + struct iphdr *iph; struct tcphdr _tcph, *th; char *data, *data_limit, *ib_ptr; int dir = CTINFO2DIR(ctinfo); @@ -148,9 +143,10 @@ static int help(struct sk_buff **pskb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + iph = ip_hdr(*pskb); + pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest)); for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { @@ -158,18 +154,18 @@ static int help(struct sk_buff **pskb, unsigned int protoff, continue; } data += strlen(dccprotos[i]); - DEBUGP("DCC %s detected\n", dccprotos[i]); + pr_debug("DCC %s detected\n", dccprotos[i]); /* we have at least * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid * data left (== 14/13 bytes) */ if (parse_dcc((char *)data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { - DEBUGP("unable to parse dcc command\n"); + pr_debug("unable to parse dcc command\n"); continue; } - DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n", - HIPQUAD(dcc_ip), dcc_port); + pr_debug("DCC bound ip/port: %u.%u.%u.%u:%u\n", + HIPQUAD(dcc_ip), dcc_port); /* dcc_ip can be the internal OR external (NAT'ed) IP */ tuple = &ct->tuplehash[dir].tuple; @@ -184,16 +180,16 @@ static int help(struct sk_buff **pskb, unsigned int protoff, continue; } - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[!dir].tuple; port = htons(dcc_port); - nf_conntrack_expect_init(exp, tuple->src.l3num, - NULL, &tuple->dst.u3, - IPPROTO_TCP, NULL, &port); + nf_ct_expect_init(exp, tuple->src.l3num, + NULL, &tuple->dst.u3, + IPPROTO_TCP, NULL, &port); nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) @@ -201,9 +197,9 @@ static int help(struct sk_buff **pskb, unsigned int protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); goto out; } } @@ -239,9 +235,6 @@ static int __init nf_conntrack_irc_init(void) irc[i].tuple.src.l3num = AF_INET; irc[i].tuple.src.u.tcp.port = htons(ports[i]); irc[i].tuple.dst.protonum = IPPROTO_TCP; - irc[i].mask.src.l3num = 0xFFFF; - irc[i].mask.src.u.tcp.port = htons(0xFFFF); - irc[i].mask.dst.protonum = 0xFF; irc[i].max_expected = max_dcc_channels; irc[i].timeout = dcc_timeout; irc[i].me = THIS_MODULE; diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index cbd96f3c1b89..991c52c9a28b 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -31,12 +31,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -67,29 +61,21 @@ static int generic_print_conntrack(struct seq_file *s, return 0; } -static int -generic_prepare(struct sk_buff **pskb, unsigned int hooknum, - unsigned int *dataoff, u_int8_t *protonum) +static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { /* Never track !!! */ return -NF_ACCEPT; } -static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) - -{ - return NF_CT_F_BASIC; -} - -struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, .print_conntrack = generic_print_conntrack, - .prepare = generic_prepare, - .get_features = generic_get_features, + .get_l4proto = generic_get_l4proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 1093478cc007..1d59fabeb5f7 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -74,7 +74,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, if (mask == 0) goto out; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) goto out; @@ -83,16 +83,13 @@ static int help(struct sk_buff **pskb, unsigned int protoff, exp->mask.src.u3.ip = mask; exp->mask.src.u.udp.port = htons(0xFFFF); - exp->mask.dst.u3.ip = htonl(0xFFFFFFFF); - exp->mask.dst.u.udp.port = htons(0xFFFF); - exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = NULL; - nf_conntrack_expect_related(exp); - nf_conntrack_expect_put(exp); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); nf_ct_refresh(ct, *pskb, timeout * HZ); out: @@ -104,9 +101,6 @@ static struct nf_conntrack_helper helper __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, - .mask.src.l3num = 0xFFFF, - .mask.src.u.udp.port = __constant_htons(0xFFFF), - .mask.dst.protonum = 0xFF, .max_expected = 1, .me = THIS_MODULE, .help = help, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d0fe3d769828..2863e72b4091 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -428,7 +428,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; - struct list_head *i; + struct hlist_node *n; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; @@ -436,8 +436,8 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { - h = (struct nf_conntrack_tuple_hash *) i; + hlist_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], + hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); @@ -689,7 +689,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) return -ENOENT; @@ -744,7 +744,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(&tuple); if (!h) return -ENOENT; @@ -856,23 +856,23 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) return 0; } - if (!help) { - /* FIXME: we need to reallocate and rehash */ - return -EBUSY; - } - helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) return -EINVAL; - if (help->helper == helper) - return 0; - - if (help->helper) - return -EBUSY; + if (help) { + if (help->helper == helper) + return 0; + if (help->helper) + return -EBUSY; + /* need to zero data of old helper */ + memset(&help->help, 0, sizeof(help->help)); + } else { + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + return -ENOMEM; + } - /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); rcu_assign_pointer(help->helper, helper); return 0; @@ -957,7 +957,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], struct nf_conn *ct; int err = -EINVAL; struct nf_conn_help *help; - struct nf_conntrack_helper *helper = NULL; + struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(otuple, rtuple); if (ct == NULL || IS_ERR(ct)) @@ -987,9 +987,14 @@ ctnetlink_create_conntrack(struct nfattr *cda[], ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif - help = nfct_help(ct); - if (help) { - helper = nf_ct_helper_find_get(rtuple); + helper = nf_ct_helper_find_get(rtuple); + if (helper) { + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) { + nf_ct_helper_put(helper); + err = -ENOMEM; + goto err; + } /* not in hash table yet so not strictly necessary */ rcu_assign_pointer(help->helper, helper); } @@ -1047,17 +1052,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } /* implicit 'else' */ - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { - err = -EINVAL; - goto out_unlock; - } - /* We manipulate the conntrack inside the global conntrack table lock, * so there's no need to increase the refcount */ err = -EEXIST; - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) - err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda); + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + err = -EINVAL; + goto out_unlock; + } + err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), + cda); + } out_unlock: write_unlock_bh(&nf_conntrack_lock); @@ -1089,22 +1095,29 @@ nfattr_failure: static inline int ctnetlink_exp_dump_mask(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *mask) + const struct nf_conntrack_tuple_mask *mask) { int ret; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); + struct nf_conntrack_tuple m; + struct nfattr *nest_parms; + + memset(&m, 0xFF, sizeof(m)); + m.src.u.all = mask->src.u.all; + memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); + + nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); - ret = ctnetlink_dump_tuples_ip(skb, mask, l3proto); + ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) goto nfattr_failure; l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, mask, l4proto); + ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) goto nfattr_failure; @@ -1223,32 +1236,52 @@ nfattr_failure: return NOTIFY_DONE; } #endif +static int ctnetlink_exp_done(struct netlink_callback *cb) +{ + if (cb->args[1]) + nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); + return 0; +} static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conntrack_expect *exp = NULL; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[0]; + struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; read_lock_bh(&nf_conntrack_lock); - list_for_each_prev(i, &nf_conntrack_expect_list) { - exp = (struct nf_conntrack_expect *) i; - if (l3proto && exp->tuple.src.l3num != l3proto) - continue; - if (exp->id <= *id) - continue; - if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) - goto out; - *id = exp->id; + last = (struct nf_conntrack_expect *)cb->args[1]; + for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { +restart: + hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hnode) { + if (l3proto && exp->tuple.src.l3num != l3proto) + continue; + if (cb->args[1]) { + if (exp != last) + continue; + cb->args[1] = 0; + } + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) { + atomic_inc(&exp->use); + cb->args[1] = (unsigned long)exp; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } } out: read_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_expect_put(last); return skb->len; } @@ -1275,7 +1308,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, - ctnetlink_done); + ctnetlink_exp_done); } if (cda[CTA_EXPECT_MASTER-1]) @@ -1286,14 +1319,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_conntrack_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return -ENOENT; } } @@ -1309,14 +1342,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err <= 0) goto free; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); free: kfree_skb(skb2); out: - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return err; } @@ -1324,11 +1357,13 @@ static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[]) { - struct nf_conntrack_expect *exp, *tmp; + struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; + unsigned int i; int err; if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) @@ -1341,25 +1376,26 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_conntrack_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&tuple); if (!exp) return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return -ENOENT; } } /* after list removal, usage count == 1 */ - nf_conntrack_unexpect_related(exp); + nf_ct_unexpect_related(exp); /* have to put what we 'get' above. * after this line usage count == 0 */ - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME-1]) { char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); + struct nf_conn_help *m_help; /* delete all expectations for this helper */ write_lock_bh(&nf_conntrack_lock); @@ -1368,24 +1404,30 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, write_unlock_bh(&nf_conntrack_lock); return -EINVAL; } - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, - list) { - struct nf_conn_help *m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], + hnode) { + m_help = nfct_help(exp->master); + if (m_help->helper == h + && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } write_unlock_bh(&nf_conntrack_lock); } else { /* This basically means we have to flush everything*/ write_lock_bh(&nf_conntrack_lock); - list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, - list) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_conntrack_expect_put(exp); + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_expect_hash[i], + hnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } } } write_unlock_bh(&nf_conntrack_lock); @@ -1421,7 +1463,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple, NULL); + h = nf_conntrack_find_get(&master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1433,7 +1475,7 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) goto out; } - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (!exp) { err = -ENOMEM; goto out; @@ -1444,10 +1486,11 @@ ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) exp->master = ct; exp->helper = NULL; memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); - memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple)); + memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); + exp->mask.src.u.all = mask.src.u.all; - err = nf_conntrack_expect_related(exp); - nf_conntrack_expect_put(exp); + err = nf_ct_expect_related(exp); + nf_ct_expect_put(exp); out: nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); @@ -1477,7 +1520,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; write_lock_bh(&nf_conntrack_lock); - exp = __nf_conntrack_expect_find(&tuple); + exp = __nf_ct_expect_find(&tuple); if (!exp) { write_unlock_bh(&nf_conntrack_lock); @@ -1567,7 +1610,7 @@ static int __init ctnetlink_init(void) goto err_unreg_exp_subsys; } - ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp); + ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); if (ret < 0) { printk("ctnetlink_init: cannot expect register notifier.\n"); goto err_unreg_notifier; @@ -1593,7 +1636,7 @@ static void __exit ctnetlink_exit(void) printk("ctnetlink: unregistering from nfnetlink.\n"); #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp); + nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); nf_conntrack_unregister_notifier(&ctnl_notifier); #endif diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 115bcb5d5a7c..b0804199ab59 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -65,7 +65,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#if 0 +#ifdef DEBUG /* PptpControlMessageType names */ const char *pptp_msg_name[] = { "UNKNOWN_MESSAGE", @@ -86,9 +86,6 @@ const char *pptp_msg_name[] = { "SET_LINK_INFO" }; EXPORT_SYMBOL(pptp_msg_name); -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) #endif #define SECS *HZ @@ -102,7 +99,7 @@ static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; - DEBUGP("increasing timeouts\n"); + pr_debug("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; @@ -121,17 +118,17 @@ static void pptp_expectfn(struct nf_conn *ct, /* obviously this tuple inversion only works until you do NAT */ nf_ct_invert_tuplepr(&inv_t, &exp->tuple); - DEBUGP("trying to unexpect other dir: "); + pr_debug("trying to unexpect other dir: "); NF_CT_DUMP_TUPLE(&inv_t); - exp_other = nf_conntrack_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(&inv_t); if (exp_other) { /* delete other expectation. */ - DEBUGP("found\n"); - nf_conntrack_unexpect_related(exp_other); - nf_conntrack_expect_put(exp_other); + pr_debug("found\n"); + nf_ct_unexpect_related(exp_other); + nf_ct_expect_put(exp_other); } else { - DEBUGP("not found\n"); + pr_debug("not found\n"); } } rcu_read_unlock(); @@ -143,13 +140,13 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) struct nf_conntrack_expect *exp; struct nf_conn *sibling; - DEBUGP("trying to timeout ct or exp for tuple "); + pr_debug("trying to timeout ct or exp for tuple "); NF_CT_DUMP_TUPLE(t); - h = nf_conntrack_find_get(t, NULL); + h = nf_conntrack_find_get(t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); - DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; if (del_timer(&sibling->timeout)) @@ -157,11 +154,11 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_conntrack_expect_find_get(t); + exp = nf_ct_expect_find_get(t); if (exp) { - DEBUGP("unexpect_related of expect %p\n", exp); - nf_conntrack_unexpect_related(exp); - nf_conntrack_expect_put(exp); + pr_debug("unexpect_related of expect %p\n", exp); + nf_ct_unexpect_related(exp); + nf_ct_expect_put(exp); return 1; } } @@ -182,7 +179,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout original pns->pac ct/exp\n"); + pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); @@ -190,7 +187,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout reply pac->pns ct/exp\n"); + pr_debug("failed to timeout reply pac->pns ct/exp\n"); } /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ @@ -201,36 +198,36 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) int ret = 1; typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre; - exp_orig = nf_conntrack_expect_alloc(ct); + exp_orig = nf_ct_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = nf_conntrack_expect_alloc(ct); + exp_reply = nf_ct_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; /* original direction, PNS->PAC */ dir = IP_CT_DIR_ORIGINAL; - nf_conntrack_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num, - &ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[dir].tuple.dst.u3, - IPPROTO_GRE, &peer_callid, &callid); + nf_ct_expect_init(exp_orig, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &peer_callid, &callid); exp_orig->expectfn = pptp_expectfn; /* reply direction, PAC->PNS */ dir = IP_CT_DIR_REPLY; - nf_conntrack_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num, - &ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[dir].tuple.dst.u3, - IPPROTO_GRE, &callid, &peer_callid); + nf_ct_expect_init(exp_reply, ct->tuplehash[dir].tuple.src.l3num, + &ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[dir].tuple.dst.u3, + IPPROTO_GRE, &callid, &peer_callid); exp_reply->expectfn = pptp_expectfn; nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre); if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK) nf_nat_pptp_exp_gre(exp_orig, exp_reply); - if (nf_conntrack_expect_related(exp_orig) != 0) + if (nf_ct_expect_related(exp_orig) != 0) goto out_put_both; - if (nf_conntrack_expect_related(exp_reply) != 0) + if (nf_ct_expect_related(exp_reply) != 0) goto out_unexpect_orig; /* Add GRE keymap entries */ @@ -243,16 +240,16 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) ret = 0; out_put_both: - nf_conntrack_expect_put(exp_reply); + nf_ct_expect_put(exp_reply); out_put_orig: - nf_conntrack_expect_put(exp_orig); + nf_ct_expect_put(exp_orig); out: return ret; out_unexpect_both: - nf_conntrack_unexpect_related(exp_reply); + nf_ct_unexpect_related(exp_reply); out_unexpect_orig: - nf_conntrack_unexpect_related(exp_orig); + nf_ct_unexpect_related(exp_orig); goto out_put_both; } @@ -270,7 +267,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; msg = ntohs(ctlh->messageType); - DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + pr_debug("inbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REPLY: @@ -305,8 +302,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = pptpReq->ocack.peersCallID; if (info->pns_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); + pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { info->cstate = PPTP_CALL_OUT_CONF; @@ -322,7 +319,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, goto invalid; cid = pptpReq->icreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; info->pac_call_id = cid; break; @@ -341,7 +338,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, if (info->pns_call_id != pcid) goto invalid; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ @@ -351,7 +348,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_CALL_DISCONNECT_NOTIFY: /* server confirms disconnect */ cid = pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -374,11 +371,11 @@ pptp_inbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + pr_debug("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } @@ -396,7 +393,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; msg = ntohs(ctlh->messageType); - DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + pr_debug("outbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REQUEST: @@ -418,7 +415,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ cid = pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->pns_call_id = cid; break; @@ -432,8 +429,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, pcid = pptpReq->icack.peersCallID; if (info->pac_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); + pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { /* part two of the three-way handshake */ @@ -469,11 +466,11 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + pr_debug("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } @@ -524,7 +521,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { - DEBUGP("no full PPTP header, can't track\n"); + pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; } nexthdr_off += sizeof(_pptph); @@ -533,7 +530,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, /* if it's not a control message we can't do anything with it */ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { - DEBUGP("not a control packet\n"); + pr_debug("not a control packet\n"); return NF_ACCEPT; } @@ -569,8 +566,8 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, /* server -> client (PAC -> PNS) */ ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); - DEBUGP("sstate: %d->%d, cstate: %d->%d\n", - oldsstate, info->sstate, oldcstate, info->cstate); + pr_debug("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); spin_unlock_bh(&nf_pptp_lock); return ret; @@ -585,9 +582,6 @@ static struct nf_conntrack_helper pptp __read_mostly = { .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, - .mask.src.l3num = 0xffff, - .mask.src.u.tcp.port = __constant_htons(0xffff), - .mask.dst.protonum = 0xff, .help = conntrack_pptp_help, .destroy = pptp_destroy_siblings, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 6faf1bed7224..d8b501878d9f 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -98,7 +98,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = +struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .l4proto = 0, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 339c397d1b5f..bdbead8a7a83 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -40,12 +40,6 @@ #define GRE_TIMEOUT (30 * HZ) #define GRE_STREAM_TIMEOUT (180 * HZ) -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#else -#define DEBUGP(x, args...) -#endif - static DEFINE_RWLOCK(nf_ct_gre_lock); static LIST_HEAD(gre_keymap_list); @@ -87,7 +81,7 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) } read_unlock_bh(&nf_ct_gre_lock); - DEBUGP("lookup src key 0x%x for ", key); + pr_debug("lookup src key 0x%x for ", key); NF_CT_DUMP_TUPLE(t); return key; @@ -107,8 +101,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, if (gre_key_cmpfn(km, t) && km == *kmp) return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); + pr_debug("trying to override keymap_%s for ct %p\n", + dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; } @@ -118,7 +112,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, memcpy(&km->tuple, t, sizeof(*t)); *kmp = km; - DEBUGP("adding new entry %p: ", km); + pr_debug("adding new entry %p: ", km); NF_CT_DUMP_TUPLE(&km->tuple); write_lock_bh(&nf_ct_gre_lock); @@ -135,13 +129,13 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) struct nf_conn_help *help = nfct_help(ct); enum ip_conntrack_dir dir; - DEBUGP("entering for ct %p\n", ct); + pr_debug("entering for ct %p\n", ct); write_lock_bh(&nf_ct_gre_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (help->help.ct_pptp_info.keymap[dir]) { - DEBUGP("removing %p from list\n", - help->help.ct_pptp_info.keymap[dir]); + pr_debug("removing %p from list\n", + help->help.ct_pptp_info.keymap[dir]); list_del(&help->help.ct_pptp_info.keymap[dir]->list); kfree(help->help.ct_pptp_info.keymap[dir]); help->help.ct_pptp_info.keymap[dir] = NULL; @@ -186,7 +180,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, return 1; if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { - DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + pr_debug("GRE_VERSION_PPTP but unknown proto\n"); return 0; } @@ -242,7 +236,7 @@ static int gre_packet(struct nf_conn *ct, static int gre_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { - DEBUGP(": "); + pr_debug(": "); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); /* initialize to sane value. Ideally a conntrack helper @@ -258,16 +252,16 @@ static int gre_new(struct nf_conn *ct, const struct sk_buff *skb, static void gre_destroy(struct nf_conn *ct) { struct nf_conn *master = ct->master; - DEBUGP(" entering\n"); + pr_debug(" entering\n"); if (!master) - DEBUGP("no master !?!\n"); + pr_debug("no master !?!\n"); else nf_ct_gre_keymap_destroy(master); } /* protocol helper struct */ -static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, .name = "gre", diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6e41ba5b0345..04192acc7c40 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,12 +25,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> -#if 0 -#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) -#else -#define DEBUGP(format, args...) -#endif - /* Protects conntrack->proto.sctp */ static DEFINE_RWLOCK(sctp_lock); @@ -151,9 +145,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb, { sctp_sctphdr_t _hdr, *hp; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - /* Actually only need first 8 bytes. */ hp = skb_header_pointer(skb, dataoff, 8, &_hdr); if (hp == NULL) @@ -167,9 +158,6 @@ static int sctp_pkt_to_tuple(const struct sk_buff *skb, static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - tuple->src.u.sctp.port = orig->dst.u.sctp.port; tuple->dst.u.sctp.port = orig->src.u.sctp.port; return 1; @@ -179,9 +167,6 @@ static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, static int sctp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - return seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.sctp.port), ntohs(tuple->dst.u.sctp.port)); @@ -193,9 +178,6 @@ static int sctp_print_conntrack(struct seq_file *s, { enum sctp_conntrack state; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - read_lock_bh(&sctp_lock); state = conntrack->proto.sctp.state; read_unlock_bh(&sctp_lock); @@ -219,13 +201,10 @@ static int do_basic_checks(struct nf_conn *conntrack, sctp_chunkhdr_t _sch, *sch; int flag; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - flag = 0; for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { - DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); + pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); if (sch->type == SCTP_CID_INIT || sch->type == SCTP_CID_INIT_ACK @@ -242,7 +221,7 @@ static int do_basic_checks(struct nf_conn *conntrack, || sch->type == SCTP_CID_COOKIE_ECHO || flag) && count !=0) || !sch->length) { - DEBUGP("Basic checks failed\n"); + pr_debug("Basic checks failed\n"); return 1; } @@ -251,7 +230,7 @@ static int do_basic_checks(struct nf_conn *conntrack, } } - DEBUGP("Basic checks passed\n"); + pr_debug("Basic checks passed\n"); return count == 0; } @@ -261,50 +240,47 @@ static int new_state(enum ip_conntrack_dir dir, { int i; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - DEBUGP("Chunk type: %d\n", chunk_type); + pr_debug("Chunk type: %d\n", chunk_type); switch (chunk_type) { case SCTP_CID_INIT: - DEBUGP("SCTP_CID_INIT\n"); + pr_debug("SCTP_CID_INIT\n"); i = 0; break; case SCTP_CID_INIT_ACK: - DEBUGP("SCTP_CID_INIT_ACK\n"); + pr_debug("SCTP_CID_INIT_ACK\n"); i = 1; break; case SCTP_CID_ABORT: - DEBUGP("SCTP_CID_ABORT\n"); + pr_debug("SCTP_CID_ABORT\n"); i = 2; break; case SCTP_CID_SHUTDOWN: - DEBUGP("SCTP_CID_SHUTDOWN\n"); + pr_debug("SCTP_CID_SHUTDOWN\n"); i = 3; break; case SCTP_CID_SHUTDOWN_ACK: - DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); + pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); i = 4; break; case SCTP_CID_ERROR: - DEBUGP("SCTP_CID_ERROR\n"); + pr_debug("SCTP_CID_ERROR\n"); i = 5; break; case SCTP_CID_COOKIE_ECHO: - DEBUGP("SCTP_CID_COOKIE_ECHO\n"); + pr_debug("SCTP_CID_COOKIE_ECHO\n"); i = 6; break; case SCTP_CID_COOKIE_ACK: - DEBUGP("SCTP_CID_COOKIE_ACK\n"); + pr_debug("SCTP_CID_COOKIE_ACK\n"); i = 7; break; case SCTP_CID_SHUTDOWN_COMPLETE: - DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); + pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); i = 8; break; default: /* Other chunks like DATA, SACK, HEARTBEAT and its ACK do not cause a change in state */ - DEBUGP("Unknown chunk type, Will stay in %s\n", - sctp_conntrack_names[cur_state]); + pr_debug("Unknown chunk type, Will stay in %s\n", + sctp_conntrack_names[cur_state]); return cur_state; } - DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", - dir, sctp_conntrack_names[cur_state], chunk_type, - sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); + pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", + dir, sctp_conntrack_names[cur_state], chunk_type, + sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); return sctp_conntracks[dir][i][cur_state]; } @@ -323,9 +299,6 @@ static int sctp_packet(struct nf_conn *conntrack, u_int32_t offset, count; char map[256 / sizeof (char)] = {0}; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); if (sh == NULL) return -1; @@ -340,7 +313,7 @@ static int sctp_packet(struct nf_conn *conntrack, && !test_bit(SCTP_CID_ABORT, (void *)map) && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { - DEBUGP("Verification tag check failed\n"); + pr_debug("Verification tag check failed\n"); return -1; } @@ -385,8 +358,9 @@ static int sctp_packet(struct nf_conn *conntrack, /* Invalid */ if (newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", - CTINFO2DIR(ctinfo), sch->type, oldsctpstate); + pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " + "conntrack=%u\n", + CTINFO2DIR(ctinfo), sch->type, oldsctpstate); write_unlock_bh(&sctp_lock); return -1; } @@ -402,8 +376,8 @@ static int sctp_packet(struct nf_conn *conntrack, write_unlock_bh(&sctp_lock); return -1; } - DEBUGP("Setting vtag %x for dir %d\n", - ih->init_tag, !CTINFO2DIR(ctinfo)); + pr_debug("Setting vtag %x for dir %d\n", + ih->init_tag, !CTINFO2DIR(ctinfo)); conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; } @@ -418,7 +392,7 @@ static int sctp_packet(struct nf_conn *conntrack, if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { - DEBUGP("Setting assured bit\n"); + pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &conntrack->status); nf_conntrack_event_cache(IPCT_STATUS, skb); } @@ -436,9 +410,6 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, u_int32_t offset, count; char map[256 / sizeof (char)] = {0}; - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); if (sh == NULL) return 0; @@ -462,7 +433,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, /* Invalid: delete conntrack */ if (newconntrack == SCTP_CONNTRACK_NONE || newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); + pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); return 0; } @@ -476,8 +447,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, if (ih == NULL) return 0; - DEBUGP("Setting vtag %x for new conn\n", - ih->init_tag); + pr_debug("Setting vtag %x for new conn\n", + ih->init_tag); conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; @@ -489,8 +460,8 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ else { - DEBUGP("Setting vtag %x for new conn OOTB\n", - sh->vtag); + pr_debug("Setting vtag %x for new conn OOTB\n", + sh->vtag); conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; } @@ -630,7 +601,7 @@ static struct ctl_table sctp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -651,7 +622,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { #endif }; -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -689,8 +660,6 @@ int __init nf_conntrack_proto_sctp_init(void) cleanup_sctp4: nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out: - DEBUGP("SCTP conntrack module loading %s\n", - ret ? "failed": "succeeded"); return ret; } @@ -698,7 +667,6 @@ void __exit nf_conntrack_proto_sctp_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); - DEBUGP("SCTP conntrack module unloaded\n"); } module_init(nf_conntrack_proto_sctp_init); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ccdd5d231e0d..eb3fe7401466 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/timer.h> -#include <linux/netfilter.h> #include <linux/module.h> #include <linux/in.h> #include <linux/tcp.h> @@ -26,13 +25,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> -#if 0 -#define DEBUGP printk -#define DEBUGP_VARS -#else -#define DEBUGP(format, args...) -#endif - /* Protects conntrack->proto.tcp */ static DEFINE_RWLOCK(tcp_lock); @@ -496,7 +488,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -static int tcp_in_window(struct ip_ct_tcp *state, +static int tcp_in_window(struct nf_conn *ct, + struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, @@ -506,6 +499,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; + struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; int res; @@ -520,18 +514,17 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); - DEBUGP("tcp_in_window: START\n"); - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack=%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_in_window: START\n"); + pr_debug("tcp_in_window: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", + seq, ack, sack, win, end); + pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); if (sender->td_end == 0) { /* @@ -609,23 +602,22 @@ static int tcp_in_window(struct ip_ct_tcp *state, */ seq = end = sender->td_end; - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack =%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - - DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(seq, sender->td_maxend + 1), - after(end, sender->td_end - receiver->td_maxwin - 1), - before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + pr_debug("tcp_in_window: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", + seq, ack, sack, win, end); + pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", + before(seq, sender->td_maxend + 1), + after(end, sender->td_end - receiver->td_maxwin - 1), + before(sack, receiver->td_end + 1), + after(ack, receiver->td_end - MAXACKWINDOW(sender))); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && @@ -694,10 +686,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "SEQ is over the upper bound (over the window of the receiver)"); } - DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " - "receiver end=%u maxend=%u maxwin=%u\n", - res, sender->td_end, sender->td_maxend, sender->td_maxwin, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin); + pr_debug("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " + "receiver end=%u maxend=%u maxwin=%u\n", + res, sender->td_end, sender->td_maxend, sender->td_maxwin, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; } @@ -711,11 +703,9 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, int dir) { struct tcphdr *tcph = (void *)skb->data + dataoff; - __u32 end; -#ifdef DEBUGP_VARS struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; -#endif + __u32 end; end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); @@ -727,12 +717,12 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, conntrack->proto.tcp.seen[dir].td_end = end; conntrack->proto.tcp.last_end = end; write_unlock_bh(&tcp_lock); - DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); } EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); #endif @@ -823,6 +813,7 @@ static int tcp_packet(struct nf_conn *conntrack, int pf, unsigned int hooknum) { + struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; struct tcphdr *th, _tcph; @@ -837,6 +828,7 @@ static int tcp_packet(struct nf_conn *conntrack, dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; + tuple = &conntrack->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_IGNORE: @@ -880,9 +872,8 @@ static int tcp_packet(struct nf_conn *conntrack, return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ - DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(th), - old_state); + pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", + dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, @@ -933,7 +924,7 @@ static int tcp_packet(struct nf_conn *conntrack, break; } - if (!tcp_in_window(&conntrack->proto.tcp, dir, index, + if (!tcp_in_window(conntrack, &conntrack->proto.tcp, dir, index, skb, dataoff, th, pf)) { write_unlock_bh(&tcp_lock); return -NF_ACCEPT; @@ -942,13 +933,12 @@ static int tcp_packet(struct nf_conn *conntrack, /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; - DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), - (th->syn ? 1 : 0), (th->ack ? 1 : 0), - (th->fin ? 1 : 0), (th->rst ? 1 : 0), - old_state, new_state); + pr_debug("tcp_conntracks: "); + NF_CT_DUMP_TUPLE(tuple); + pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", + (th->syn ? 1 : 0), (th->ack ? 1 : 0), + (th->fin ? 1 : 0), (th->rst ? 1 : 0), + old_state, new_state); conntrack->proto.tcp.state = new_state; if (old_state != new_state @@ -997,10 +987,8 @@ static int tcp_new(struct nf_conn *conntrack, { enum tcp_conntrack new_state; struct tcphdr *th, _tcph; -#ifdef DEBUGP_VARS struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; -#endif th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); @@ -1012,7 +1000,7 @@ static int tcp_new(struct nf_conn *conntrack, /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { - DEBUGP("nf_ct_tcp: invalid new deleting.\n"); + pr_debug("nf_ct_tcp: invalid new deleting.\n"); return 0; } @@ -1065,12 +1053,12 @@ static int tcp_new(struct nf_conn *conntrack, conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; conntrack->proto.tcp.last_index = TCP_NONE_SET; - DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); + pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); return 1; } @@ -1383,7 +1371,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, @@ -1412,7 +1400,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3620ecc095fd..2a2fd1a764ea 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/timer.h> #include <linux/module.h> -#include <linux/netfilter.h> #include <linux/udp.h> #include <linux/seq_file.h> #include <linux/skbuff.h> @@ -191,7 +190,7 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, @@ -218,7 +217,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index eb2d1dc46d45..355d371bac93 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -40,12 +40,6 @@ static u_int16_t ports[MAX_PORTS]; static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - struct sane_request { __be32 RPC_code; #define SANE_NET_START 7 /* RPC code */ @@ -125,15 +119,15 @@ static int help(struct sk_buff **pskb, ct_sane_info->state = SANE_STATE_NORMAL; if (datalen < sizeof(struct sane_reply_net_start)) { - DEBUGP("nf_ct_sane: NET_START reply too short\n"); + pr_debug("nf_ct_sane: NET_START reply too short\n"); goto out; } reply = (struct sane_reply_net_start *)sb_ptr; if (reply->status != htonl(SANE_STATUS_SUCCESS)) { /* saned refused the command */ - DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", - ntohl(reply->status)); + pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + ntohl(reply->status)); goto out; } @@ -141,35 +135,32 @@ static int help(struct sk_buff **pskb, if (reply->zero != 0) goto out; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) { ret = NF_DROP; goto out; } tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_TCP, - NULL, &reply->port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, NULL, &reply->port); - DEBUGP("nf_ct_sane: expect: "); + pr_debug("nf_ct_sane: expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); - NF_CT_DUMP_TUPLE(&exp->mask); /* Can't expect this? Best to drop packet now. */ - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); out: spin_unlock_bh(&nf_sane_lock); return ret; } -static struct nf_conntrack_helper sane[MAX_PORTS][2]; -static char sane_names[MAX_PORTS][2][sizeof("sane-65535")]; +static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; +static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly; /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_sane_fini(void) @@ -178,9 +169,9 @@ static void nf_conntrack_sane_fini(void) for (i = 0; i < ports_c; i++) { for (j = 0; j < 2; j++) { - DEBUGP("nf_ct_sane: unregistering helper for pf: %d " - "port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_sane: unregistering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&sane[i][j]); } } @@ -208,8 +199,6 @@ static int __init nf_conntrack_sane_init(void) for (j = 0; j < 2; j++) { sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); sane[i][j].tuple.dst.protonum = IPPROTO_TCP; - sane[i][j].mask.src.u.tcp.port = 0xFFFF; - sane[i][j].mask.dst.protonum = 0xFF; sane[i][j].max_expected = 1; sane[i][j].timeout = 5 * 60; /* 5 Minutes */ sane[i][j].me = THIS_MODULE; @@ -221,9 +210,9 @@ static int __init nf_conntrack_sane_init(void) sprintf(tmpname, "sane-%d", ports[i]); sane[i][j].name = tmpname; - DEBUGP("nf_ct_sane: registering helper for pf: %d " - "port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_debug("nf_ct_sane: registering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&sane[i][j]); if (ret) { printk(KERN_ERR "nf_ct_sane: failed to " diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1b5c6c1055f7..d449fa47491c 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -21,12 +21,6 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_sip.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP connection tracking helper"); @@ -285,7 +279,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr, const char *aux = dptr; if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { - DEBUGP("ip: %s parse failed.!\n", dptr); + pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -301,6 +295,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr, static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { + const char *start = dptr; int s = *shift; /* Search for @, but stop at the end of the line. @@ -315,8 +310,10 @@ static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, if (dptr <= limit && *dptr == '@') { dptr++; (*shift)++; - } else + } else { + dptr = start; *shift = s; + } return epaddr_len(ct, dptr, limit, shift); } @@ -336,7 +333,8 @@ int ct_sip_get_info(struct nf_conn *ct, while (dptr <= limit) { if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && - (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { + (hnfo->sname == NULL || + strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { dptr++; continue; } @@ -344,8 +342,8 @@ int ct_sip_get_info(struct nf_conn *ct, ct_sip_lnlen(dptr, limit), hnfo->case_sensitive); if (!aux) { - DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, - hnfo->lname); + pr_debug("'%s' not found in '%s'.\n", hnfo->ln_str, + hnfo->lname); return -1; } aux += hnfo->ln_strlen; @@ -356,11 +354,11 @@ int ct_sip_get_info(struct nf_conn *ct, *matchoff = (aux - k) + shift; - DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname, - *matchlen); + pr_debug("%s match succeeded! - len: %u\n", hnfo->lname, + *matchlen); return 1; } - DEBUGP("%s header not found.\n", hnfo->lname); + pr_debug("%s header not found.\n", hnfo->lname); return 0; } EXPORT_SYMBOL_GPL(ct_sip_get_info); @@ -378,23 +376,23 @@ static int set_expected_rtp(struct sk_buff **pskb, int ret; typeof(nf_nat_sdp_hook) nf_nat_sdp; - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) return NF_DROP; - nf_conntrack_expect_init(exp, family, - &ct->tuplehash[!dir].tuple.src.u3, addr, - IPPROTO_UDP, NULL, &port); + nf_ct_expect_init(exp, family, + &ct->tuplehash[!dir].tuple.src.u3, addr, + IPPROTO_UDP, NULL, &port); nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); else { - if (nf_conntrack_expect_related(exp) != 0) + if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; else ret = NF_ACCEPT; } - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); return ret; } @@ -424,7 +422,7 @@ static int sip_help(struct sk_buff **pskb, if (!skb_is_nonlinear(*pskb)) dptr = (*pskb)->data + dataoff; else { - DEBUGP("Copy of skbuff not supported yet.\n"); + pr_debug("Copy of skbuff not supported yet.\n"); goto out; } @@ -506,9 +504,6 @@ static int __init nf_conntrack_sip_init(void) for (j = 0; j < 2; j++) { sip[i][j].tuple.dst.protonum = IPPROTO_UDP; sip[i][j].tuple.src.u.udp.port = htons(ports[i]); - sip[i][j].mask.src.l3num = 0xFFFF; - sip[i][j].mask.src.u.udp.port = htons(0xFFFF); - sip[i][j].mask.dst.protonum = 0xFF; sip[i][j].max_expected = 2; sip[i][j].timeout = 3 * 60; /* 3 minutes */ sip[i][j].me = THIS_MODULE; @@ -521,7 +516,7 @@ static int __init nf_conntrack_sip_init(void) sprintf(tmpname, "sip-%u", i); sip[i][j].name = tmpname; - DEBUGP("port #%u: %u\n", i, ports[i]); + pr_debug("port #%u: %u\n", i, ports[i]); ret = nf_conntrack_helper_register(&sip[i][j]); if (ret) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 45baeb0e30f9..a4ce5e887997 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -25,12 +25,6 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - MODULE_LICENSE("GPL"); #ifdef CONFIG_PROC_FS @@ -60,35 +54,36 @@ struct ct_iter_state { unsigned int bucket; }; -static struct list_head *ct_get_first(struct seq_file *seq) +static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!list_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].next; + if (!hlist_empty(&nf_conntrack_hash[st->bucket])) + return nf_conntrack_hash[st->bucket].first; } return NULL; } -static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) +static struct hlist_node *ct_get_next(struct seq_file *seq, + struct hlist_node *head) { struct ct_iter_state *st = seq->private; head = head->next; - while (head == &nf_conntrack_hash[st->bucket]) { + while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].next; + head = nf_conntrack_hash[st->bucket].first; } return head; } -static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct list_head *head = ct_get_first(seq); + struct hlist_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) @@ -186,11 +181,11 @@ static int ct_seq_show(struct seq_file *s, void *v) if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) return -ENOSPC; - + return 0; } -static struct seq_operations ct_seq_ops = { +static const struct seq_operations ct_seq_ops = { .start = ct_seq_start, .next = ct_seq_next, .stop = ct_seq_stop, @@ -203,7 +198,7 @@ static int ct_open(struct inode *inode, struct file *file) struct ct_iter_state *st; int ret; - st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); if (st == NULL) return -ENOMEM; ret = seq_open(file, &ct_seq_ops); @@ -211,7 +206,6 @@ static int ct_open(struct inode *inode, struct file *file) goto out_free; seq = file->private_data; seq->private = st; - memset(st, 0, sizeof(struct ct_iter_state)); return ret; out_free: kfree(st); @@ -294,7 +288,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations ct_cpu_seq_ops = { +static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, .stop = ct_cpu_seq_stop, @@ -371,7 +365,14 @@ static ctl_table nf_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_expect_max", + .data = &nf_ct_expect_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; @@ -410,7 +411,7 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid); static int __init nf_conntrack_standalone_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc, *proc_exp, *proc_stat; + struct proc_dir_entry *proc, *proc_stat; #endif int ret = 0; @@ -422,13 +423,9 @@ static int __init nf_conntrack_standalone_init(void) proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, - &exp_file_ops); - if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); if (!proc_stat) - goto cleanup_proc_exp; + goto cleanup_proc; proc_stat->proc_fops = &ct_cpu_seq_fops; proc_stat->owner = THIS_MODULE; @@ -448,8 +445,6 @@ static int __init nf_conntrack_standalone_init(void) #endif #ifdef CONFIG_PROC_FS remove_proc_entry("nf_conntrack", proc_net_stat); - cleanup_proc_exp: - proc_net_remove("nf_conntrack_expect"); cleanup_proc: proc_net_remove("nf_conntrack"); cleanup_init: @@ -465,7 +460,6 @@ static void __exit nf_conntrack_standalone_fini(void) #endif #ifdef CONFIG_PROC_FS remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack_expect"); proc_net_remove("nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 37c4542e3112..cc19506cf2f8 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -29,13 +29,6 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; @@ -62,39 +55,35 @@ static int tftp_help(struct sk_buff **pskb, case TFTP_OPCODE_READ: case TFTP_OPCODE_WRITE: /* RRQ and WRQ works the same way */ - DEBUGP(""); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - exp = nf_conntrack_expect_alloc(ct); + exp = nf_ct_expect_alloc(ct); if (exp == NULL) return NF_DROP; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - nf_conntrack_expect_init(exp, family, - &tuple->src.u3, &tuple->dst.u3, - IPPROTO_UDP, - NULL, &tuple->dst.u.udp.port); + nf_ct_expect_init(exp, family, &tuple->src.u3, &tuple->dst.u3, + IPPROTO_UDP, NULL, &tuple->dst.u.udp.port); - DEBUGP("expect: "); + pr_debug("expect: "); NF_CT_DUMP_TUPLE(&exp->tuple); - NF_CT_DUMP_TUPLE(&exp->mask); nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(pskb, ctinfo, exp); - else if (nf_conntrack_expect_related(exp) != 0) + else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; - nf_conntrack_expect_put(exp); + nf_ct_expect_put(exp); break; case TFTP_OPCODE_DATA: case TFTP_OPCODE_ACK: - DEBUGP("Data/ACK opcode\n"); + pr_debug("Data/ACK opcode\n"); break; case TFTP_OPCODE_ERROR: - DEBUGP("Error opcode\n"); + pr_debug("Error opcode\n"); break; default: - DEBUGP("Unknown opcode\n"); + pr_debug("Unknown opcode\n"); } return ret; } @@ -128,9 +117,6 @@ static int __init nf_conntrack_tftp_init(void) for (j = 0; j < 2; j++) { tftp[i][j].tuple.dst.protonum = IPPROTO_UDP; tftp[i][j].tuple.src.u.udp.port = htons(ports[i]); - tftp[i][j].mask.src.l3num = 0xFFFF; - tftp[i][j].mask.dst.protonum = 0xFF; - tftp[i][j].mask.src.u.udp.port = htons(0xFFFF); tftp[i][j].max_expected = 1; tftp[i][j].timeout = 5 * 60; /* 5 minutes */ tftp[i][j].me = THIS_MODULE; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 91b220cf5a1f..d67c4fbf6031 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -9,7 +9,7 @@ #include "nf_internals.h" -/* Internal logging interface, which relies on the real +/* Internal logging interface, which relies on the real LOG target modules */ #define NF_LOG_PREFIXLEN 128 @@ -140,7 +140,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%2lld %s\n", *pos, logger->name); } -static struct seq_operations nflog_seq_ops = { +static const struct seq_operations nflog_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b1f2ace96f6d..a481a349f7bf 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -17,7 +17,7 @@ */ static struct nf_queue_handler *queue_handler[NPROTO]; -static DEFINE_RWLOCK(queue_handler_lock); +static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ @@ -28,30 +28,37 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) if (pf >= NPROTO) return -EINVAL; - write_lock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); if (queue_handler[pf] == qh) ret = -EEXIST; else if (queue_handler[pf]) ret = -EBUSY; else { - queue_handler[pf] = qh; + rcu_assign_pointer(queue_handler[pf], qh); ret = 0; } - write_unlock_bh(&queue_handler_lock); + mutex_unlock(&queue_handler_mutex); return ret; } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf) +int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh) { if (pf >= NPROTO) return -EINVAL; - write_lock_bh(&queue_handler_lock); - queue_handler[pf] = NULL; - write_unlock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); + if (queue_handler[pf] != qh) { + mutex_unlock(&queue_handler_mutex); + return -EINVAL; + } + + rcu_assign_pointer(queue_handler[pf], NULL); + mutex_unlock(&queue_handler_mutex); + + synchronize_rcu(); return 0; } @@ -61,12 +68,14 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh) { int pf; - write_lock_bh(&queue_handler_lock); + mutex_lock(&queue_handler_mutex); for (pf = 0; pf < NPROTO; pf++) { if (queue_handler[pf] == qh) - queue_handler[pf] = NULL; + rcu_assign_pointer(queue_handler[pf], NULL); } - write_unlock_bh(&queue_handler_lock); + mutex_unlock(&queue_handler_mutex); + + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); @@ -89,18 +98,21 @@ static int __nf_queue(struct sk_buff *skb, struct net_device *physoutdev = NULL; #endif struct nf_afinfo *afinfo; + struct nf_queue_handler *qh; /* QUEUE == DROP if noone is waiting, to be safe. */ - read_lock(&queue_handler_lock); - if (!queue_handler[pf]) { - read_unlock(&queue_handler_lock); + rcu_read_lock(); + + qh = rcu_dereference(queue_handler[pf]); + if (!qh) { + rcu_read_unlock(); kfree_skb(skb); return 1; } afinfo = nf_get_afinfo(pf); if (!afinfo) { - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree_skb(skb); return 1; } @@ -110,7 +122,7 @@ static int __nf_queue(struct sk_buff *skb, if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", skb); - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree_skb(skb); return 1; } @@ -120,7 +132,7 @@ static int __nf_queue(struct sk_buff *skb, /* If it's going away, ignore hook. */ if (!try_module_get(info->elem->owner)) { - read_unlock(&queue_handler_lock); + rcu_read_unlock(); kfree(info); return 0; } @@ -138,10 +150,9 @@ static int __nf_queue(struct sk_buff *skb, } #endif afinfo->saveroute(skb, info); - status = queue_handler[pf]->outfn(skb, info, queuenum, - queue_handler[pf]->data); + status = qh->outfn(skb, info, queuenum, qh->data); - read_unlock(&queue_handler_lock); + rcu_read_unlock(); if (status < 0) { /* James M doesn't say fuck enough. */ @@ -308,18 +319,18 @@ static int seq_show(struct seq_file *s, void *v) loff_t *pos = v; struct nf_queue_handler *qh; - read_lock_bh(&queue_handler_lock); - qh = queue_handler[*pos]; + rcu_read_lock(); + qh = rcu_dereference(queue_handler[*pos]); if (!qh) ret = seq_printf(s, "%2lld NONE\n", *pos); else ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); - read_unlock_bh(&queue_handler_lock); + rcu_read_unlock(); return ret; } -static struct seq_operations nfqueue_seq_ops = { +static const struct seq_operations nfqueue_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 8b8ece750313..e32761ce260c 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -55,18 +55,7 @@ EXPORT_SYMBOL(nf_register_sockopt); void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { - /* No point being interruptible: we're probably in cleanup_module() */ - restart: mutex_lock(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - mutex_unlock(&nf_sockopt_mutex); - schedule(); - goto restart; - } list_del(®->list); mutex_unlock(&nf_sockopt_mutex); } @@ -86,10 +75,11 @@ static int nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; @@ -97,23 +87,20 @@ static int nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } @@ -144,10 +131,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; + if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_get) ret = ops->compat_get(sk, @@ -160,7 +149,6 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_set) ret = ops->compat_set(sk, @@ -171,17 +159,15 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e32e30e7a17c..2351533a8507 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -58,7 +58,6 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ struct timer_list timer; int peer_pid; /* PID of the peer process */ @@ -345,10 +344,12 @@ static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, static int __nfulnl_send(struct nfulnl_instance *inst) { - int status; + int status = -1; if (inst->qlen > 1) - inst->lastnlh->nlmsg_type = NLMSG_DONE; + NLMSG_PUT(inst->skb, 0, 0, + NLMSG_DONE, + sizeof(struct nfgenmsg)); status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); if (status < 0) { @@ -358,8 +359,8 @@ __nfulnl_send(struct nfulnl_instance *inst) inst->qlen = 0; inst->skb = NULL; - inst->lastnlh = NULL; +nlmsg_failure: return status; } @@ -538,7 +539,6 @@ __build_packet_message(struct nfulnl_instance *inst, } nlh->nlmsg_len = inst->skb->tail - old_tail; - inst->lastnlh = nlh; return 0; nlmsg_failure: @@ -644,7 +644,8 @@ nfulnl_log_packet(unsigned int pf, } if (inst->qlen >= qthreshold || - (inst->skb && size > skb_tailroom(inst->skb))) { + (inst->skb && size > + skb_tailroom(inst->skb) - sizeof(struct nfgenmsg))) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ UDEBUG("flushing old skb\n"); @@ -962,7 +963,7 @@ static int seq_show(struct seq_file *s, void *v) inst->flushtimeout, atomic_read(&inst->use)); } -static struct seq_operations nful_seq_ops = { +static const struct seq_operations nful_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7a97bec67729..bb65a38c816c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -913,9 +913,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, case NFQNL_CFG_CMD_PF_UNBIND: QDEBUG("unregistering queue handler for pf=%u\n", ntohs(cmd->pf)); - /* This is a bug and a feature. We can unregister - * other handlers(!) */ - ret = nf_unregister_queue_handler(ntohs(cmd->pf)); + ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh); break; default: ret = -EINVAL; @@ -1050,7 +1048,7 @@ static int seq_show(struct seq_file *s, void *v) atomic_read(&inst->use)); } -static struct seq_operations nfqnl_seq_ops = { +static const struct seq_operations nfqnl_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0eb2504b89b5..cc2baa6d5a7a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -320,8 +320,8 @@ int xt_check_match(const struct xt_match *match, unsigned short family, return -EINVAL; } if (match->hooks && (hook_mask & ~match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %u\n", - xt_prefix[family], match->name, hook_mask); + printk("%s_tables: %s match: bad hook_mask %u/%u\n", + xt_prefix[family], match->name, hook_mask, match->hooks); return -EINVAL; } if (match->proto && (match->proto != proto || inv_proto)) { @@ -410,8 +410,9 @@ int xt_check_target(const struct xt_target *target, unsigned short family, return -EINVAL; } if (target->hooks && (hook_mask & ~target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %u\n", - xt_prefix[family], target->name, hook_mask); + printk("%s_tables: %s target: bad hook_mask %u/%u\n", + xt_prefix[family], target->name, hook_mask, + target->hooks); return -EINVAL; } if (target->proto && (target->proto != proto || inv_proto)) { @@ -744,7 +745,7 @@ static int xt_name_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations xt_tgt_seq_ops = { +static const struct seq_operations xt_tgt_seq_ops = { .start = xt_tgt_seq_start, .next = xt_tgt_seq_next, .stop = xt_tgt_seq_stop, diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 30884833e665..519428566829 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_classify_target[] = { +static struct xt_target xt_classify_target[] __read_mostly = { { .family = AF_INET, .name = "CLASSIFY", diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index b03ce009d0bf..5a00c5444334 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -76,33 +76,33 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct xt_connmark_target_info *matchinfo = targinfo; + const struct xt_connmark_target_info *matchinfo = targinfo; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " "called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; + return false; } } if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); - return 0; + return false; } - return 1; + return true; } static void @@ -121,7 +121,7 @@ struct compat_xt_connmark_target_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_connmark_target_info *cm = src; + const struct compat_xt_connmark_target_info *cm = src; struct xt_connmark_target_info m = { .mark = cm->mark, .mask = cm->mask, @@ -132,7 +132,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_connmark_target_info *m = src; + const struct xt_connmark_target_info *m = src; struct compat_xt_connmark_target_info cm = { .mark = m->mark, .mask = m->mask, @@ -142,7 +142,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target xt_connmark_target[] = { +static struct xt_target xt_connmark_target[] __read_mostly = { { .name = "CONNMARK", .family = AF_INET, diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 81c0c58bab47..63d73138c1b9 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -33,7 +33,7 @@ MODULE_ALIAS("ip6t_CONNSECMARK"); * If the packet has a security mark and the connection does not, copy * the security mark from the packet to the connection. */ -static void secmark_save(struct sk_buff *skb) +static void secmark_save(const struct sk_buff *skb) { if (skb->secmark) { struct nf_conn *ct; @@ -85,16 +85,16 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, return XT_CONTINUE; } -static int checkentry(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { - struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = targinfo; if (nf_ct_l3proto_try_module_get(target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", target->family); - return 0; + return false; } switch (info->mode) { case CONNSECMARK_SAVE: @@ -103,10 +103,10 @@ static int checkentry(const char *tablename, const void *entry, default: printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); - return 0; + return false; } - return 1; + return true; } static void @@ -115,7 +115,7 @@ destroy(const struct xt_target *target, void *targinfo) nf_ct_l3proto_module_put(target->family); } -static struct xt_target xt_connsecmark_target[] = { +static struct xt_target xt_connsecmark_target[] __read_mostly = { { .name = "CONNSECMARK", .family = AF_INET, diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 9f2f2201f6ae..798ab731009d 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -66,22 +66,22 @@ static unsigned int target6(struct sk_buff **pskb, return XT_CONTINUE; } -static int checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; - if ((dscp > XT_DSCP_MAX)) { + if (dscp > XT_DSCP_MAX) { printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; + return false; } - return 1; + return true; } -static struct xt_target xt_dscp_target[] = { +static struct xt_target xt_dscp_target[] __read_mostly = { { .name = "DSCP", .family = AF_INET, diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 43817808d865..f30fe0baf7de 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -65,43 +65,43 @@ target_v1(struct sk_buff **pskb, } -static int +static bool checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = targinfo; if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return 0; + return false; } - return 1; + return true; } -static int +static bool checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { - struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = targinfo; if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND && markinfo->mode != XT_MARK_OR) { printk(KERN_WARNING "MARK: unknown mode %u\n", markinfo->mode); - return 0; + return false; } if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return 0; + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT @@ -114,7 +114,7 @@ struct compat_xt_mark_target_info_v1 { static void compat_from_user_v1(void *dst, void *src) { - struct compat_xt_mark_target_info_v1 *cm = src; + const struct compat_xt_mark_target_info_v1 *cm = src; struct xt_mark_target_info_v1 m = { .mark = cm->mark, .mode = cm->mode, @@ -124,7 +124,7 @@ static void compat_from_user_v1(void *dst, void *src) static int compat_to_user_v1(void __user *dst, void *src) { - struct xt_mark_target_info_v1 *m = src; + const struct xt_mark_target_info_v1 *m = src; struct compat_xt_mark_target_info_v1 cm = { .mark = m->mark, .mode = m->mode, @@ -133,7 +133,7 @@ static int compat_to_user_v1(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_target xt_mark_target[] = { +static struct xt_target xt_mark_target[] __read_mostly = { { .name = "MARK", .family = AF_INET, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 901ed7abaa1b..d3594c7ccb26 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -38,21 +38,21 @@ nflog_target(struct sk_buff **pskb, return XT_CONTINUE; } -static int +static bool nflog_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targetinfo, unsigned int hookmask) { - struct xt_nflog_info *info = targetinfo; + const struct xt_nflog_info *info = targetinfo; if (info->flags & ~XT_NFLOG_MASK) - return 0; + return false; if (info->prefix[sizeof(info->prefix) - 1] != '\0') - return 0; - return 1; + return false; + return true; } -static struct xt_target xt_nflog_target[] = { +static struct xt_target xt_nflog_target[] __read_mostly = { { .name = "NFLOG", .family = AF_INET, diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 201155b316e0..13f59f3e8c38 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -36,7 +36,7 @@ target(struct sk_buff **pskb, return NF_QUEUE_NR(tinfo->queuenum); } -static struct xt_target xt_nfqueue_target[] = { +static struct xt_target xt_nfqueue_target[] __read_mostly = { { .name = "NFQUEUE", .family = AF_INET, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 5085fb3d1e2d..b7d6312fccc7 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_notrack_target[] = { +static struct xt_target xt_notrack_target[] __read_mostly = { { .name = "NOTRACK", .family = AF_INET, diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 705f0e830a79..c83779a941a1 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -51,7 +51,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, return XT_CONTINUE; } -static int checkentry_selinux(struct xt_secmark_target_info *info) +static bool checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; @@ -63,53 +63,53 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) if (err == -EINVAL) printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n", sel->selctx); - return 0; + return false; } if (!sel->selsid) { printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n", sel->selctx); - return 0; + return false; } err = selinux_relabel_packet_permission(sel->selsid); if (err) { printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); - return 0; + return false; } - return 1; + return true; } -static int checkentry(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; if (mode && mode != info->mode) { printk(KERN_INFO PFX "mode already set to %hu cannot mix with " "rules for mode %hu\n", mode, info->mode); - return 0; + return false; } switch (info->mode) { case SECMARK_MODE_SEL: if (!checkentry_selinux(info)) - return 0; + return false; break; default: printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); - return 0; + return false; } if (!mode) mode = info->mode; - return 1; + return true; } -static struct xt_target xt_secmark_target[] = { +static struct xt_target xt_secmark_target[] __read_mostly = { { .name = "SECMARK", .family = AF_INET, diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 15fe8f649510..d40f7e4b1289 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -93,7 +93,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, return 0; opt[i+2] = (newmss & 0xff00) >> 8; - opt[i+3] = (newmss & 0x00ff); + opt[i+3] = newmss & 0x00ff; nf_proto_csum_replace2(&tcph->check, *pskb, htons(oldmss), htons(newmss), 0); @@ -126,7 +126,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; - opt[3] = (newmss & 0x00ff); + opt[3] = newmss & 0x00ff; nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); @@ -197,19 +197,19 @@ xt_tcpmss_target6(struct sk_buff **pskb, #define TH_SYN 0x02 /* Must specify -p tcp --syn */ -static inline int find_syn_match(const struct xt_entry_match *m) +static inline bool find_syn_match(const struct xt_entry_match *m) { const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; if (strcmp(m->u.kernel.match->name, "tcp") == 0 && tcpinfo->flg_cmp & TH_SYN && !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) - return 1; + return true; - return 0; + return false; } -static int +static bool xt_tcpmss_checkentry4(const char *tablename, const void *entry, const struct xt_target *target, @@ -225,16 +225,16 @@ xt_tcpmss_checkentry4(const char *tablename, (1 << NF_IP_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; + return false; } if (IPT_MATCH_ITERATE(e, find_syn_match)) - return 1; + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); - return 0; + return false; } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static int +static bool xt_tcpmss_checkentry6(const char *tablename, const void *entry, const struct xt_target *target, @@ -250,16 +250,16 @@ xt_tcpmss_checkentry6(const char *tablename, (1 << NF_IP6_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; + return false; } if (IP6T_MATCH_ITERATE(e, find_syn_match)) - return 1; + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); - return 0; + return false; } #endif -static struct xt_target xt_tcpmss_reg[] = { +static struct xt_target xt_tcpmss_reg[] __read_mostly = { { .family = AF_INET, .name = "TCPMSS", diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 7db492d65220..64bcdb0fe1e6 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -15,7 +15,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_comment"); MODULE_ALIAS("ip6t_comment"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -23,13 +23,13 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protooff, - int *hotdrop) + bool *hotdrop) { /* We always match */ - return 1; + return true; } -static struct xt_match xt_comment_match[] = { +static struct xt_match xt_comment_match[] __read_mostly = { { .name = "comment", .family = AF_INET, diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 804afe55e141..dd4d79b8fc9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -15,7 +15,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); MODULE_ALIAS("ipt_connbytes"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -23,10 +23,10 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_connbytes_info *sinfo = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; @@ -35,7 +35,7 @@ match(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return false; counters = ct->counters; switch (sinfo->what) { @@ -90,36 +90,36 @@ match(const struct sk_buff *skb, } if (sinfo->count.to) - return (what <= sinfo->count.to && what >= sinfo->count.from); + return what <= sinfo->count.to && what >= sinfo->count.from; else - return (what >= sinfo->count.from); + return what >= sinfo->count.from; } -static int check(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && sinfo->what != XT_CONNBYTES_AVGPKT) - return 0; + return false; if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && sinfo->direction != XT_CONNBYTES_DIR_REPLY && sinfo->direction != XT_CONNBYTES_DIR_BOTH) - return 0; + return false; if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void @@ -128,7 +128,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_connbytes_match[] = { +static struct xt_match xt_connbytes_match[] __read_mostly = { { .name = "connbytes", .family = AF_INET, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e1803256c792..e73fa9b46cf7 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,7 +30,7 @@ MODULE_DESCRIPTION("IP tables connmark match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connmark"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -38,38 +38,38 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_connmark_info *info = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return false; - return (((ct->mark) & info->mask) == info->mark) ^ info->invert; + return ((ct->mark & info->mask) == info->mark) ^ info->invert; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - struct xt_connmark_info *cm = matchinfo; + const struct xt_connmark_info *cm = matchinfo; if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); - return 0; + return false; } if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void @@ -88,7 +88,7 @@ struct compat_xt_connmark_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_connmark_info *cm = src; + const struct compat_xt_connmark_info *cm = src; struct xt_connmark_info m = { .mark = cm->mark, .mask = cm->mask, @@ -99,7 +99,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_connmark_info *m = src; + const struct xt_connmark_info *m = src; struct compat_xt_connmark_info cm = { .mark = m->mark, .mask = m->mask, @@ -109,7 +109,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_connmark_match[] = { +static struct xt_match xt_connmark_match[] __read_mostly = { { .name = "connmark", .family = AF_INET, diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 189ded5f378b..ca4b69f020a8 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables connection tracking match module"); MODULE_ALIAS("ipt_conntrack"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -27,14 +27,14 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_conntrack_info *sinfo = matchinfo; - struct nf_conn *ct; + const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int statebit; - ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) @@ -54,53 +54,53 @@ match(const struct sk_buff *skb, } if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) - return 0; + return false; } if (ct == NULL) { if (sinfo->flags & ~XT_CONNTRACK_STATE) - return 0; - return 1; + return false; + return true; } if (sinfo->flags & XT_CONNTRACK_PROTO && FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_ORIGSRC && FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_ORIGDST && FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_REPLSRC && FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_REPLDST && FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; + return false; if (sinfo->flags & XT_CONNTRACK_STATUS && FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; + return false; if(sinfo->flags & XT_CONNTRACK_EXPIRES) { unsigned long expires = timer_pending(&ct->timeout) ? @@ -109,12 +109,12 @@ match(const struct sk_buff *skb, if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) - return 0; + return false; } - return 1; + return true; } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -124,9 +124,9 @@ checkentry(const char *tablename, if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void destroy(const struct xt_match *match, void *matchinfo) @@ -150,7 +150,7 @@ struct compat_xt_conntrack_info static void compat_from_user(void *dst, void *src) { - struct compat_xt_conntrack_info *cm = src; + const struct compat_xt_conntrack_info *cm = src; struct xt_conntrack_info m = { .statemask = cm->statemask, .statusmask = cm->statusmask, @@ -167,7 +167,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_conntrack_info *m = src; + const struct xt_conntrack_info *m = src; struct compat_xt_conntrack_info cm = { .statemask = m->statemask, .statusmask = m->statusmask, @@ -183,7 +183,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif -static struct xt_match conntrack_match = { +static struct xt_match conntrack_match __read_mostly = { .name = "conntrack", .match = match, .checkentry = checkentry, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 2c9c0dee8aaf..83224ec89cc0 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -31,40 +31,40 @@ MODULE_ALIAS("ipt_dccp"); static unsigned char *dccp_optbuf; static DEFINE_SPINLOCK(dccp_buflock); -static inline int +static inline bool dccp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, const struct dccp_hdr *dh, - int *hotdrop) + bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - unsigned char *op; + const unsigned char *op; unsigned int optoff = __dccp_hdr_len(dh); unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); unsigned int i; if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } if (!optlen) - return 0; + return false; spin_lock_bh(&dccp_buflock); op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf); if (op == NULL) { /* If we don't have the whole header, drop packet. */ spin_unlock_bh(&dccp_buflock); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } for (i = 0; i < optlen; ) { if (op[i] == option) { spin_unlock_bh(&dccp_buflock); - return 1; + return true; } if (op[i] < 2) @@ -74,24 +74,24 @@ dccp_find_option(u_int8_t option, } spin_unlock_bh(&dccp_buflock); - return 0; + return false; } -static inline int +static inline bool match_types(const struct dccp_hdr *dh, u_int16_t typemask) { - return (typemask & (1 << dh->dccph_type)); + return typemask & (1 << dh->dccph_type); } -static inline int +static inline bool match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, - const struct dccp_hdr *dh, int *hotdrop) + const struct dccp_hdr *dh, bool *hotdrop) { return dccp_find_option(option, skb, protoff, dh, hotdrop); } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -99,25 +99,25 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_dccp_info *info = matchinfo; struct dccp_hdr _dh, *dh; if (offset) - return 0; + return false; dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh); if (dh == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } - return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) - && (ntohs(dh->dccph_sport) <= info->spts[1])), + return DCCHECK(ntohs(dh->dccph_sport) >= info->spts[0] + && ntohs(dh->dccph_sport) <= info->spts[1], XT_DCCP_SRC_PORTS, info->flags, info->invflags) - && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) - && (ntohs(dh->dccph_dport) <= info->dpts[1])), + && DCCHECK(ntohs(dh->dccph_dport) >= info->dpts[0] + && ntohs(dh->dccph_dport) <= info->dpts[1], XT_DCCP_DEST_PORTS, info->flags, info->invflags) && DCCHECK(match_types(dh, info->typemask), XT_DCCP_TYPE, info->flags, info->invflags) @@ -126,7 +126,7 @@ match(const struct sk_buff *skb, XT_DCCP_OPTION, info->flags, info->invflags); } -static int +static bool checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -140,7 +140,7 @@ checkentry(const char *tablename, && !(info->invflags & ~info->flags); } -static struct xt_match xt_dccp_match[] = { +static struct xt_match xt_dccp_match[] __read_mostly = { { .name = "dccp", .family = AF_INET, diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 56b247ecc283..dde6d66e0d33 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -22,14 +22,14 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_dscp"); MODULE_ALIAS("ip6t_dscp"); -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_dscp_info *info = matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; @@ -37,14 +37,14 @@ static int match(const struct sk_buff *skb, return (dscp == info->dscp) ^ !!info->invert; } -static int match6(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool match6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_dscp_info *info = matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; @@ -52,23 +52,23 @@ static int match6(const struct sk_buff *skb, return (dscp == info->dscp) ^ !!info->invert; } -static int checkentry(const char *tablename, - const void *info, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; if (dscp > XT_DSCP_MAX) { printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); - return 0; + return false; } - return 1; + return true; } -static struct xt_match xt_dscp_match[] = { +static struct xt_match xt_dscp_match[] __read_mostly = { { .name = "dscp", .family = AF_INET, diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 7c95f149d942..b11378e001b6 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -31,10 +31,10 @@ MODULE_ALIAS("ip6t_esp"); #endif /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r = 0; + bool r; duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; @@ -42,7 +42,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) return r; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -50,14 +50,14 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct ip_esp_hdr _esp, *eh; const struct xt_esp *espinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp); if (eh == NULL) { @@ -65,8 +65,8 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ESP tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return spi_match(espinfo->spis[0], espinfo->spis[1], ntohl(eh->spi), @@ -74,7 +74,7 @@ match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, @@ -85,13 +85,13 @@ checkentry(const char *tablename, if (espinfo->invflags & ~XT_ESP_INV_MASK) { duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); - return 0; + return false; } - return 1; + return true; } -static struct xt_match xt_esp_match[] = { +static struct xt_match xt_esp_match[] __read_mostly = { { .name = "esp", .family = AF_INET, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d3043fa32ebc..bd45f9d3f7d0 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -94,7 +94,8 @@ static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); static struct kmem_cache *hashlimit_cachep __read_mostly; -static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) +static inline bool dst_cmp(const struct dsthash_ent *ent, + const struct dsthash_dst *b) { return !memcmp(&ent->dst, b, sizeof(ent->dst)); } @@ -106,7 +107,8 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) } static struct dsthash_ent * -dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) +dsthash_find(const struct xt_hashlimit_htable *ht, + const struct dsthash_dst *dst) { struct dsthash_ent *ent; struct hlist_node *pos; @@ -122,7 +124,8 @@ dsthash_find(const struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * -dsthash_alloc_init(struct xt_hashlimit_htable *ht, struct dsthash_dst *dst) +dsthash_alloc_init(struct xt_hashlimit_htable *ht, + const struct dsthash_dst *dst) { struct dsthash_ent *ent; @@ -227,19 +230,21 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family) return 0; } -static int select_all(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_all(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he) { return 1; } -static int select_gc(struct xt_hashlimit_htable *ht, struct dsthash_ent *he) +static bool select_gc(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he) { - return (jiffies >= he->expires); + return jiffies >= he->expires; } static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, - int (*select)(struct xt_hashlimit_htable *ht, - struct dsthash_ent *he)) + bool (*select)(const struct xt_hashlimit_htable *ht, + const struct dsthash_ent *he)) { unsigned int i; @@ -282,7 +287,8 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) vfree(hinfo); } -static struct xt_hashlimit_htable *htable_find_get(char *name, int family) +static struct xt_hashlimit_htable *htable_find_get(const char *name, + int family) { struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; @@ -367,7 +373,8 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) } static int -hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, +hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, + struct dsthash_dst *dst, const struct sk_buff *skb, unsigned int protoff) { __be16 _ports[2], *ports; @@ -432,7 +439,7 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, return 0; } -static int +static bool hashlimit_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -440,10 +447,10 @@ hashlimit_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct xt_hashlimit_info *r = - ((struct xt_hashlimit_info *)matchinfo)->u.master; + const struct xt_hashlimit_info *r = + ((const struct xt_hashlimit_info *)matchinfo)->u.master; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; @@ -478,20 +485,20 @@ hashlimit_match(const struct sk_buff *skb, /* We're underlimit. */ dh->rateinfo.credit -= dh->rateinfo.cost; spin_unlock_bh(&hinfo->lock); - return 1; + return true; } spin_unlock_bh(&hinfo->lock); /* default case: we're overlimit, thus don't match */ - return 0; + return false; hotdrop: - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } -static int +static bool hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -505,20 +512,20 @@ hashlimit_checkentry(const char *tablename, user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) { printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", r->cfg.avg, r->cfg.burst); - return 0; + return false; } if (r->cfg.mode == 0 || r->cfg.mode > (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT)) - return 0; + return false; if (!r->cfg.gc_interval) - return 0; + return false; if (!r->cfg.expire) - return 0; + return false; if (r->name[sizeof(r->name) - 1] != '\0') - return 0; + return false; /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before x_tables.c grabs xt_mutex. @@ -530,19 +537,19 @@ hashlimit_checkentry(const char *tablename, r->hinfo = htable_find_get(r->name, match->family); if (!r->hinfo && htable_create(r, match->family) != 0) { mutex_unlock(&hlimit_mutex); - return 0; + return false; } mutex_unlock(&hlimit_mutex); /* Ugly hack: For SMP, we only want to use one set */ r->u.master = r; - return 1; + return true; } static void hashlimit_destroy(const struct xt_match *match, void *matchinfo) { - struct xt_hashlimit_info *r = matchinfo; + const struct xt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } @@ -571,7 +578,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif -static struct xt_match xt_hashlimit[] = { +static struct xt_match xt_hashlimit[] __read_mostly = { { .name = "hashlimit", .family = AF_INET, @@ -694,7 +701,7 @@ static int dl_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations dl_seq_ops = { +static const struct seq_operations dl_seq_ops = { .start = dl_seq_start, .next = dl_seq_next, .stop = dl_seq_stop, @@ -731,7 +738,7 @@ static int __init xt_hashlimit_init(void) err = -ENOMEM; hashlimit_cachep = kmem_cache_create("xt_hashlimit", sizeof(struct dsthash_ent), 0, 0, - NULL, NULL); + NULL); if (!hashlimit_cachep) { printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index c139b2f43a10..0a1f4c6bcdef 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -22,13 +22,8 @@ MODULE_DESCRIPTION("iptables helper match module"); MODULE_ALIAS("ipt_helper"); MODULE_ALIAS("ip6t_helper"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -36,61 +31,51 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_helper_info *info = matchinfo; - struct nf_conn *ct; - struct nf_conn_help *master_help; + const struct nf_conn *ct; + const struct nf_conn_help *master_help; + const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - int ret = info->invert; + bool ret = info->invert; - ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); - if (!ct) { - DEBUGP("xt_helper: Eek! invalid conntrack?\n"); + ct = nf_ct_get(skb, &ctinfo); + if (!ct || !ct->master) return ret; - } - - if (!ct->master) { - DEBUGP("xt_helper: conntrack %p has no master\n", ct); - return ret; - } - read_lock_bh(&nf_conntrack_lock); master_help = nfct_help(ct->master); - if (!master_help || !master_help->helper) { - DEBUGP("xt_helper: master ct %p has no helper\n", - exp->expectant); - goto out_unlock; - } + if (!master_help) + return ret; - DEBUGP("master's name = %s , info->name = %s\n", - ct->master->helper->name, info->name); + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(master_help->helper); + if (!helper) + return ret; if (info->name[0] == '\0') - ret ^= 1; + ret = !ret; else ret ^= !strncmp(master_help->helper->name, info->name, strlen(master_help->helper->name)); -out_unlock: - read_unlock_bh(&nf_conntrack_lock); return ret; } -static int check(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *inf, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } info->name[29] = '\0'; - return 1; + return true; } static void @@ -99,7 +84,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_helper_match[] = { +static struct xt_match xt_helper_match[] __read_mostly = { { .name = "helper", .family = AF_INET, diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 77288c5ada78..3dad173d9735 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -20,7 +20,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_length"); MODULE_ALIAS("ip6t_length"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -28,7 +28,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_length_info *info = matchinfo; u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); @@ -36,7 +36,7 @@ match(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static int +static bool match6(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -44,16 +44,16 @@ match6(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_length_info *info = matchinfo; - const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr)); + const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static struct xt_match xt_length_match[] = { +static struct xt_match xt_length_match[] __read_mostly = { { .name = "length", .family = AF_INET, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 571a72ab89ad..4fcca797150f 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int +static bool ipt_limit_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -65,9 +65,10 @@ ipt_limit_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { - struct xt_rateinfo *r = ((struct xt_rateinfo *)matchinfo)->master; + struct xt_rateinfo *r = + ((const struct xt_rateinfo *)matchinfo)->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -79,11 +80,11 @@ ipt_limit_match(const struct sk_buff *skb, /* We're not limited. */ r->credit -= r->cost; spin_unlock_bh(&limit_lock); - return 1; + return true; } spin_unlock_bh(&limit_lock); - return 0; + return false; } /* Precision saver. */ @@ -98,7 +99,7 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; } -static int +static bool ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -112,7 +113,7 @@ ipt_limit_checkentry(const char *tablename, || user2credits(r->avg * r->burst) < user2credits(r->avg)) { printk("Overflow in xt_limit, try lower: %u/%u\n", r->avg, r->burst); - return 0; + return false; } /* For SMP, we only want to use one set of counters. */ @@ -125,7 +126,7 @@ ipt_limit_checkentry(const char *tablename, r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ r->cost = user2credits(r->avg); } - return 1; + return true; } #ifdef CONFIG_COMPAT @@ -144,7 +145,7 @@ struct compat_xt_rateinfo { * master pointer, which does not need to be preserved. */ static void compat_from_user(void *dst, void *src) { - struct compat_xt_rateinfo *cm = src; + const struct compat_xt_rateinfo *cm = src; struct xt_rateinfo m = { .avg = cm->avg, .burst = cm->burst, @@ -158,7 +159,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_rateinfo *m = src; + const struct xt_rateinfo *m = src; struct compat_xt_rateinfo cm = { .avg = m->avg, .burst = m->burst, @@ -172,7 +173,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_limit_match[] = { +static struct xt_match xt_limit_match[] __read_mostly = { { .name = "limit", .family = AF_INET, diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 1d3a1d98b885..00490d777a0f 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("iptables mac matching module"); MODULE_ALIAS("ipt_mac"); MODULE_ALIAS("ip6t_mac"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -32,19 +32,19 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_mac_info *info = matchinfo; /* Is mac pointer valid? */ - return (skb_mac_header(skb) >= skb->head && - (skb_mac_header(skb) + ETH_HLEN) <= skb->data - /* If so, compare... */ - && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) - ^ info->invert)); + return skb_mac_header(skb) >= skb->head && + skb_mac_header(skb) + ETH_HLEN <= skb->data + /* If so, compare... */ + && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) + ^ info->invert); } -static struct xt_match xt_mac_match[] = { +static struct xt_match xt_mac_match[] __read_mostly = { { .name = "mac", .family = AF_INET, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 39911dddb011..c02a7f8f3925 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -19,7 +19,7 @@ MODULE_DESCRIPTION("iptables mark matching module"); MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -27,14 +27,14 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_mark_info *info = matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, @@ -45,9 +45,9 @@ checkentry(const char *tablename, if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { printk(KERN_WARNING "mark: only supports 32bit mark\n"); - return 0; + return false; } - return 1; + return true; } #ifdef CONFIG_COMPAT @@ -60,7 +60,7 @@ struct compat_xt_mark_info { static void compat_from_user(void *dst, void *src) { - struct compat_xt_mark_info *cm = src; + const struct compat_xt_mark_info *cm = src; struct xt_mark_info m = { .mark = cm->mark, .mask = cm->mask, @@ -71,7 +71,7 @@ static void compat_from_user(void *dst, void *src) static int compat_to_user(void __user *dst, void *src) { - struct xt_mark_info *m = src; + const struct xt_mark_info *m = src; struct compat_xt_mark_info cm = { .mark = m->mark, .mask = m->mask, @@ -81,7 +81,7 @@ static int compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match xt_mark_match[] = { +static struct xt_match xt_mark_match[] __read_mostly = { { .name = "mark", .family = AF_INET, diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 4dce2a81702a..e8ae10284acd 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -33,24 +33,24 @@ MODULE_ALIAS("ip6t_multiport"); #endif /* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int +static inline bool ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags, u_int8_t count, u_int16_t src, u_int16_t dst) { unsigned int i; for (i = 0; i < count; i++) { if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src) - return 1; + return true; if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst) - return 1; + return true; } - return 0; + return false; } /* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int +static inline bool ports_match_v1(const struct xt_multiport_v1 *minfo, u_int16_t src, u_int16_t dst) { @@ -67,34 +67,34 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, if (minfo->flags == XT_MULTIPORT_SOURCE && src >= s && src <= e) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_DESTINATION && dst >= s && dst <= e) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_EITHER && ((dst >= s && dst <= e) || (src >= s && src <= e))) - return 1 ^ minfo->invert; + return true ^ minfo->invert; } else { /* exact port matching */ duprintf("src or dst matches with %d?\n", s); if (minfo->flags == XT_MULTIPORT_SOURCE && src == s) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_DESTINATION && dst == s) - return 1 ^ minfo->invert; + return true ^ minfo->invert; if (minfo->flags == XT_MULTIPORT_EITHER && (src == s || dst == s)) - return 1 ^ minfo->invert; + return true ^ minfo->invert; } } return minfo->invert; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -102,13 +102,13 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { __be16 _ports[2], *pptr; const struct xt_multiport *multiinfo = matchinfo; if (offset) - return 0; + return false; pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); if (pptr == NULL) { @@ -116,8 +116,8 @@ match(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return ports_match(multiinfo->ports, @@ -125,7 +125,7 @@ match(const struct sk_buff *skb, ntohs(pptr[0]), ntohs(pptr[1])); } -static int +static bool match_v1(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,13 +133,13 @@ match_v1(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { __be16 _ports[2], *pptr; const struct xt_multiport_v1 *multiinfo = matchinfo; if (offset) - return 0; + return false; pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); if (pptr == NULL) { @@ -147,14 +147,14 @@ match_v1(const struct sk_buff *skb, * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); } -static inline int +static inline bool check(u_int16_t proto, u_int8_t ip_invflags, u_int8_t match_flags, @@ -172,7 +172,7 @@ check(u_int16_t proto, } /* Called when user tries to insert an entry of this type. */ -static int +static bool checkentry(const char *tablename, const void *info, const struct xt_match *match, @@ -186,7 +186,7 @@ checkentry(const char *tablename, multiinfo->count); } -static int +static bool checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, @@ -200,7 +200,7 @@ checkentry_v1(const char *tablename, multiinfo->count); } -static int +static bool checkentry6(const char *tablename, const void *info, const struct xt_match *match, @@ -214,7 +214,7 @@ checkentry6(const char *tablename, multiinfo->count); } -static int +static bool checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, @@ -228,7 +228,7 @@ checkentry6_v1(const char *tablename, multiinfo->count); } -static struct xt_match xt_multiport_match[] = { +static struct xt_match xt_multiport_match[] __read_mostly = { { .name = "multiport", .family = AF_INET, diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 35a0fe200c39..a4bab043a6d1 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -13,9 +13,6 @@ #include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter_bridge.h> -#define MATCH 1 -#define NOMATCH 0 MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); @@ -23,7 +20,7 @@ MODULE_DESCRIPTION("iptables bridge physical device match module"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -31,14 +28,14 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { int i; static const char nulldevname[IFNAMSIZ]; const struct xt_physdev_info *info = matchinfo; - unsigned int ret; + bool ret; const char *indev, *outdev; - struct nf_bridge_info *nf_bridge; + const struct nf_bridge_info *nf_bridge; /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if @@ -47,61 +44,61 @@ match(const struct sk_buff *skb, /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN) && !(info->invert & XT_PHYSDEV_OP_ISIN)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISOUT) && !(info->invert & XT_PHYSDEV_OP_ISOUT)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_IN) && !(info->invert & XT_PHYSDEV_OP_IN)) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_OUT) && !(info->invert & XT_PHYSDEV_OP_OUT)) - return NOMATCH; - return MATCH; + return false; + return true; } /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && (!!(nf_bridge->mask & BRNF_BRIDGED) ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) - return NOMATCH; + return false; if ((info->bitmask & XT_PHYSDEV_OP_ISIN && (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || (info->bitmask & XT_PHYSDEV_OP_ISOUT && (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) - return NOMATCH; + return false; if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { + for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { ret |= (((const unsigned int *)indev)[i] ^ ((const unsigned int *)info->physindev)[i]) & ((const unsigned int *)info->in_mask)[i]; } - if ((ret == 0) ^ !(info->invert & XT_PHYSDEV_OP_IN)) - return NOMATCH; + if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) + return false; match_outdev: if (!(info->bitmask & XT_PHYSDEV_OP_OUT)) - return MATCH; + return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { + for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { ret |= (((const unsigned int *)outdev)[i] ^ ((const unsigned int *)info->physoutdev)[i]) & ((const unsigned int *)info->out_mask)[i]; } - return (ret != 0) ^ !(info->invert & XT_PHYSDEV_OP_OUT); + return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); } -static int +static bool checkentry(const char *tablename, const void *ip, const struct xt_match *match, @@ -112,7 +109,7 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) - return 0; + return false; if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && @@ -122,12 +119,12 @@ checkentry(const char *tablename, "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " "traffic is not supported anymore.\n"); if (hook_mask & (1 << NF_IP_LOCAL_OUT)) - return 0; + return false; } - return 1; + return true; } -static struct xt_match xt_physdev_match[] = { +static struct xt_match xt_physdev_match[] __read_mostly = { { .name = "physdev", .family = AF_INET, diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index e1409fc5c288..a52925f12f35 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -21,29 +21,29 @@ MODULE_DESCRIPTION("IP tables match to match on linklayer packet type"); MODULE_ALIAS("ipt_pkttype"); MODULE_ALIAS("ip6t_pkttype"); -static int match(const struct sk_buff *skb, +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { u_int8_t type; const struct xt_pkttype_info *info = matchinfo; if (skb->pkt_type == PACKET_LOOPBACK) - type = (MULTICAST(ip_hdr(skb)->daddr) + type = MULTICAST(ip_hdr(skb)->daddr) ? PACKET_MULTICAST - : PACKET_BROADCAST); + : PACKET_BROADCAST; else type = skb->pkt_type; return (type == info->pkttype) ^ info->invert; } -static struct xt_match xt_pkttype_match[] = { +static struct xt_match xt_pkttype_match[] __read_mostly = { { .name = "pkttype", .family = AF_INET, diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 15b45a95ec13..6d6d3b7fcbb5 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -20,7 +20,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables IPsec policy matching module"); MODULE_LICENSE("GPL"); -static inline int +static inline bool xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, const union xt_policy_addr *a2, unsigned short family) { @@ -30,11 +30,11 @@ xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, case AF_INET6: return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6); } - return 0; + return false; } -static inline int -match_xfrm_state(struct xfrm_state *x, const struct xt_policy_elem *e, +static inline bool +match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e, unsigned short family) { #define MATCH_ADDR(x,y,z) (!e->match.x || \ @@ -55,7 +55,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - struct sec_path *sp = skb->sp; + const struct sec_path *sp = skb->sp; int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; @@ -85,7 +85,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb->dst; int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; @@ -108,14 +108,14 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, return strict ? i == info->len : 0; } -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_policy_info *info = matchinfo; int ret; @@ -126,45 +126,45 @@ static int match(const struct sk_buff *skb, ret = match_policy_out(skb, info, match->family); if (ret < 0) - ret = info->flags & XT_POLICY_MATCH_NONE ? 1 : 0; + ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; else if (info->flags & XT_POLICY_MATCH_NONE) - ret = 0; + ret = false; return ret; } -static int checkentry(const char *tablename, const void *ip_void, - const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) +static bool checkentry(const char *tablename, const void *ip_void, + const struct xt_match *match, + void *matchinfo, unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { printk(KERN_ERR "xt_policy: neither incoming nor " "outgoing policy selected\n"); - return 0; + return false; } /* hook values are equal for IPv4 and IPv6 */ if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) && info->flags & XT_POLICY_MATCH_OUT) { printk(KERN_ERR "xt_policy: output policy not valid in " "PRE_ROUTING and INPUT\n"); - return 0; + return false; } if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) && info->flags & XT_POLICY_MATCH_IN) { printk(KERN_ERR "xt_policy: input policy not valid in " "POST_ROUTING and OUTPUT\n"); - return 0; + return false; } if (info->len > XT_POLICY_MAX_ELEM) { printk(KERN_ERR "xt_policy: too many policy elements\n"); - return 0; + return false; } - return 1; + return true; } -static struct xt_match xt_policy_match[] = { +static struct xt_match xt_policy_match[] __read_mostly = { { .name = "policy", .family = AF_INET, diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index bfdde06ca0b7..dae97445b87b 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -16,19 +16,20 @@ MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { - struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master; - int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0; + struct xt_quota_info *q = + ((const struct xt_quota_info *)matchinfo)->master; + bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); if (q->quota >= skb->len) { q->quota -= skb->len; - ret ^= 1; + ret = !ret; } else { /* we do not allow even small packets from now on */ q->quota = 0; @@ -38,21 +39,21 @@ match(const struct sk_buff *skb, return ret; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; + struct xt_quota_info *q = matchinfo; if (q->flags & ~XT_QUOTA_MASK) - return 0; + return false; /* For SMP, we only want to use one set of counters. */ q->master = q; - return 1; + return true; } -static struct xt_match xt_quota_match[] = { +static struct xt_match xt_quota_match[] __read_mostly = { { .name = "quota", .family = AF_INET, diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index c2017f8af9c4..cc3e76d77a99 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("X_tables realm match"); MODULE_ALIAS("ipt_realm"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -29,15 +29,15 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_realm_info *info = matchinfo; - struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb->dst; return (info->id == (dst->tclassid & info->mask)) ^ info->invert; } -static struct xt_match realm_match = { +static struct xt_match realm_match __read_mostly = { .name = "realm", .match = match, .matchsize = sizeof(struct xt_realm_info), diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index f86d8d769d47..c002153b80ab 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_sctp"); #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) -static int +static bool match_flags(const struct xt_sctp_flag_info *flag_info, const int flag_count, u_int8_t chunktype, @@ -31,23 +31,21 @@ match_flags(const struct xt_sctp_flag_info *flag_info, { int i; - for (i = 0; i < flag_count; i++) { - if (flag_info[i].chunktype == chunktype) { + for (i = 0; i < flag_count; i++) + if (flag_info[i].chunktype == chunktype) return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag; - } - } - return 1; + return true; } -static inline int +static inline bool match_packet(const struct sk_buff *skb, unsigned int offset, const u_int32_t *chunkmap, int chunk_match_type, const struct xt_sctp_flag_info *flag_info, const int flag_count, - int *hotdrop) + bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; sctp_chunkhdr_t _sch, *sch; @@ -56,16 +54,15 @@ match_packet(const struct sk_buff *skb, int i = 0; #endif - if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) { + if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); - } do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); if (sch == NULL || sch->length == 0) { duprintf("Dropping invalid SCTP packet.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", @@ -80,28 +77,26 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ANY: if (match_flags(flag_info, flag_count, sch->type, sch->flags)) { - return 1; + return true; } break; case SCTP_CHUNK_MATCH_ALL: if (match_flags(flag_info, flag_count, - sch->type, sch->flags)) { + sch->type, sch->flags)) SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type); - } break; case SCTP_CHUNK_MATCH_ONLY: if (!match_flags(flag_info, flag_count, - sch->type, sch->flags)) { - return 0; - } + sch->type, sch->flags)) + return false; break; } } else { switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ONLY: - return 0; + return false; } } } while (offset < skb->len); @@ -110,16 +105,16 @@ match_packet(const struct sk_buff *skb, case SCTP_CHUNK_MATCH_ALL: return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); case SCTP_CHUNK_MATCH_ANY: - return 0; + return false; case SCTP_CHUNK_MATCH_ONLY: - return 1; + return true; } /* This will never be reached, but required to stop compiler whine */ - return 0; + return false; } -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -127,29 +122,29 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_sctp_info *info = matchinfo; sctp_sctphdr_t _sh, *sh; if (offset) { duprintf("Dropping non-first fragment.. FIXME\n"); - return 0; + return false; } sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh); if (sh == NULL) { duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); - return SCCHECK(((ntohs(sh->source) >= info->spts[0]) - && (ntohs(sh->source) <= info->spts[1])), + return SCCHECK(ntohs(sh->source) >= info->spts[0] + && ntohs(sh->source) <= info->spts[1], XT_SCTP_SRC_PORTS, info->flags, info->invflags) - && SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) - && (ntohs(sh->dest) <= info->dpts[1])), + && SCCHECK(ntohs(sh->dest) >= info->dpts[0] + && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), info->chunkmap, info->chunk_match_type, @@ -158,7 +153,7 @@ match(const struct sk_buff *skb, XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -static int +static bool checkentry(const char *tablename, const void *inf, const struct xt_match *match, @@ -177,7 +172,7 @@ checkentry(const char *tablename, | SCTP_CHUNK_MATCH_ONLY))); } -static struct xt_match xt_sctp_match[] = { +static struct xt_match xt_sctp_match[] __read_mostly = { { .name = "sctp", .family = AF_INET, diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 149294f7df71..e0a528df19a7 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("ip[6]_tables connection tracking state match module"); MODULE_ALIAS("ipt_state"); MODULE_ALIAS("ip6t_state"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -28,7 +28,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_state_info *sinfo = matchinfo; enum ip_conntrack_info ctinfo; @@ -44,18 +44,18 @@ match(const struct sk_buff *skb, return (sinfo->statemask & statebit); } -static int check(const char *tablename, - const void *inf, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool check(const char *tablename, + const void *inf, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { if (nf_ct_l3proto_try_module_get(match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); - return 0; + return false; } - return 1; + return true; } static void @@ -64,7 +64,7 @@ destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } -static struct xt_match xt_state_match[] = { +static struct xt_match xt_state_match[] __read_mostly = { { .name = "state", .family = AF_INET, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 091a9f89f5d5..4089dae4e286 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -24,26 +24,26 @@ MODULE_ALIAS("ip6t_statistic"); static DEFINE_SPINLOCK(nth_lock); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) + int offset, unsigned int protoff, bool *hotdrop) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; - int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0; + bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: if ((net_random() & 0x7FFFFFFF) < info->u.random.probability) - ret ^= 1; + ret = !ret; break; case XT_STATISTIC_MODE_NTH: info = info->master; spin_lock_bh(&nth_lock); if (info->u.nth.count++ == info->u.nth.every) { info->u.nth.count = 0; - ret ^= 1; + ret = !ret; } spin_unlock_bh(&nth_lock); break; @@ -52,21 +52,21 @@ match(const struct sk_buff *skb, return ret; } -static int +static bool checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + struct xt_statistic_info *info = matchinfo; if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) - return 0; + return false; info->master = info; - return 1; + return true; } -static struct xt_match xt_statistic_match[] = { +static struct xt_match xt_statistic_match[] __read_mostly = { { .name = "statistic", .family = AF_INET, diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 999a005dbd0c..864133442cda 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -21,14 +21,14 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) { const struct xt_string_info *conf = matchinfo; struct ts_state state; @@ -42,30 +42,30 @@ static int match(const struct sk_buff *skb, #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) -static int checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool checkentry(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; struct ts_config *ts_conf; /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) - return 0; + return false; if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') - return 0; + return false; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) - return 0; + return false; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) - return 0; + return false; conf->config = ts_conf; - return 1; + return true; } static void destroy(const struct xt_match *match, void *matchinfo) @@ -73,7 +73,7 @@ static void destroy(const struct xt_match *match, void *matchinfo) textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } -static struct xt_match xt_string_match[] = { +static struct xt_match xt_string_match[] __read_mostly = { { .name = "string", .family = AF_INET, diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 80571d0749f7..cd5f6d758c68 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -23,7 +23,7 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); -static int +static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -31,7 +31,7 @@ match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { const struct xt_tcpmss_match_info *info = matchinfo; struct tcphdr _tcph, *th; @@ -77,11 +77,11 @@ out: return info->invert; dropit: - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } -static struct xt_match xt_tcpmss_match[] = { +static struct xt_match xt_tcpmss_match[] __read_mostly = { { .name = "tcpmss", .family = AF_INET, diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 46414b562a19..223f9bded672 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -27,22 +27,19 @@ MODULE_ALIAS("ip6t_tcp"); /* Returns 1 if the port is matched by the range, 0 otherwise */ -static inline int -port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) +static inline bool +port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) { - int ret; - - ret = (port >= min && port <= max) ^ invert; - return ret; + return (port >= min && port <= max) ^ invert; } -static int +static bool tcp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, unsigned int optlen, - int invert, - int *hotdrop) + bool invert, + bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; @@ -57,8 +54,8 @@ tcp_find_option(u_int8_t option, op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); if (op == NULL) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } for (i = 0; i < optlen; ) { @@ -70,7 +67,7 @@ tcp_find_option(u_int8_t option, return invert; } -static int +static bool tcp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -78,7 +75,7 @@ tcp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct tcphdr _tcph, *th; const struct xt_tcp *tcpinfo = matchinfo; @@ -92,51 +89,51 @@ tcp_match(const struct sk_buff *skb, */ if (offset == 1) { duprintf("Dropping evil TCP offset=1 frag.\n"); - *hotdrop = 1; + *hotdrop = true; } /* Must not be a fragment. */ - return 0; + return false; } -#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) +#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) - return 0; + return false; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) - return 0; + return false; if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp, XT_TCP_INV_FLAGS)) - return 0; + return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } if (!tcp_find_option(tcpinfo->option, skb, protoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, hotdrop)) - return 0; + return false; } - return 1; + return true; } /* Called when user tries to insert an entry of this type. */ -static int +static bool tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, @@ -149,7 +146,7 @@ tcp_checkentry(const char *tablename, return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); } -static int +static bool udp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -157,22 +154,22 @@ udp_match(const struct sk_buff *skb, const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop) + bool *hotdrop) { struct udphdr _udph, *uh; const struct xt_udp *udpinfo = matchinfo; /* Must not be a fragment. */ if (offset) - return 0; + return false; uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); - *hotdrop = 1; - return 0; + *hotdrop = true; + return false; } return port_match(udpinfo->spts[0], udpinfo->spts[1], @@ -184,20 +181,20 @@ udp_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static int +static bool udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, unsigned int hook_mask) { - const struct xt_tcp *udpinfo = matchinfo; + const struct xt_udp *udpinfo = matchinfo; /* Must specify no unknown invflags */ return !(udpinfo->invflags & ~XT_UDP_INV_MASK); } -static struct xt_match xt_tcpudp_match[] = { +static struct xt_match xt_tcpudp_match[] __read_mostly = { { .name = "tcp", .family = AF_INET, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 24b660f16ce3..c060e3f991f1 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -41,6 +41,7 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" +#include "netlabel_mgmt.h" /* Argument struct for cipso_v4_doi_walk() */ struct netlbl_cipsov4_doiwalk_arg { @@ -419,6 +420,8 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) ret_val = netlbl_cipsov4_add_pass(info); break; } + if (ret_val == 0) + netlbl_mgmt_protocount_inc(); audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, &audit_info); @@ -694,6 +697,8 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) ret_val = cipso_v4_doi_remove(doi, &audit_info, netlbl_cipsov4_doi_free); + if (ret_val == 0) + netlbl_mgmt_protocount_dec(); audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f46a0aeec44f..b6c844b7e1c1 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -126,7 +126,9 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); - list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) + list_for_each_entry_rcu(iter, + &rcu_dereference(netlbl_domhsh)->tbl[bkt], + list) if (iter->valid && strcmp(iter->domain, domain) == 0) return iter; } @@ -227,7 +229,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, spin_lock(&netlbl_domhsh_lock); if (netlbl_domhsh_search(entry->domain, 0) == NULL) list_add_tail_rcu(&entry->list, - &netlbl_domhsh->tbl[bkt]); + &rcu_dereference(netlbl_domhsh)->tbl[bkt]); else ret_val = -EEXIST; spin_unlock(&netlbl_domhsh_lock); @@ -423,8 +425,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt, iter_bkt < rcu_dereference(netlbl_domhsh)->size; iter_bkt++, chain_cnt = 0) { list_for_each_entry_rcu(iter_entry, - &netlbl_domhsh->tbl[iter_bkt], - list) + &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], + list) if (iter_entry->valid) { if (chain_cnt++ < *skip_chain) continue; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b165712aaa70..4f50949722a9 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -38,6 +38,7 @@ #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" #include "netlabel_user.h" +#include "netlabel_mgmt.h" /* * Security Attribute Functions @@ -245,6 +246,26 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, */ /** + * netlbl_enabled - Determine if the NetLabel subsystem is enabled + * + * Description: + * The LSM can use this function to determine if it should use NetLabel + * security attributes in it's enforcement mechanism. Currently, NetLabel is + * considered to be enabled when it's configuration contains a valid setup for + * at least one labeled protocol (i.e. NetLabel can understand incoming + * labeled packets of at least one type); otherwise NetLabel is considered to + * be disabled. + * + */ +int netlbl_enabled(void) +{ + /* At some point we probably want to expose this mechanism to the user + * as well so that admins can toggle NetLabel regardless of the + * configuration */ + return (netlbl_mgmt_protocount_value() > 0 ? 1 : 0); +} + +/** * netlbl_socket_setattr - Label a socket using the correct protocol * @sk: the socket to label * @secattr: the security attributes diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e00fc219c72b..5315dacc5222 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -42,6 +42,10 @@ #include "netlabel_user.h" #include "netlabel_mgmt.h" +/* NetLabel configured protocol count */ +static DEFINE_SPINLOCK(netlabel_mgmt_protocount_lock); +static u32 netlabel_mgmt_protocount = 0; + /* Argument struct for netlbl_domhsh_walk() */ struct netlbl_domhsh_walk_arg { struct netlink_callback *nl_cb; @@ -67,6 +71,67 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { }; /* + * NetLabel Misc Managment Functions + */ + +/** + * netlbl_mgmt_protocount_inc - Increment the configured labeled protocol count + * + * Description: + * Increment the number of labeled protocol configurations in the current + * NetLabel configuration. Keep track of this for use in determining if + * NetLabel label enforcement should be active/enabled or not in the LSM. + * + */ +void netlbl_mgmt_protocount_inc(void) +{ + rcu_read_lock(); + spin_lock(&netlabel_mgmt_protocount_lock); + netlabel_mgmt_protocount++; + spin_unlock(&netlabel_mgmt_protocount_lock); + rcu_read_unlock(); +} + +/** + * netlbl_mgmt_protocount_dec - Decrement the configured labeled protocol count + * + * Description: + * Decrement the number of labeled protocol configurations in the current + * NetLabel configuration. Keep track of this for use in determining if + * NetLabel label enforcement should be active/enabled or not in the LSM. + * + */ +void netlbl_mgmt_protocount_dec(void) +{ + rcu_read_lock(); + spin_lock(&netlabel_mgmt_protocount_lock); + if (netlabel_mgmt_protocount > 0) + netlabel_mgmt_protocount--; + spin_unlock(&netlabel_mgmt_protocount_lock); + rcu_read_unlock(); +} + +/** + * netlbl_mgmt_protocount_value - Return the number of configured protocols + * + * Description: + * Return the number of labeled protocols in the current NetLabel + * configuration. This value is useful in determining if NetLabel label + * enforcement should be active/enabled or not in the LSM. + * + */ +u32 netlbl_mgmt_protocount_value(void) +{ + u32 val; + + rcu_read_lock(); + val = netlabel_mgmt_protocount; + rcu_read_unlock(); + + return val; +} + +/* * NetLabel Command Handlers */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 3642d3bfc8eb..ccb2b3923591 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -168,4 +168,9 @@ enum { /* NetLabel protocol functions */ int netlbl_mgmt_genl_init(void); +/* NetLabel misc management functions */ +void netlbl_mgmt_protocount_inc(void); +void netlbl_mgmt_protocount_dec(void); +u32 netlbl_mgmt_protocount_value(void); + #endif diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 42f12bd65964..85a96a3fddaa 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -46,10 +46,6 @@ #include "netlabel_cipso_v4.h" #include "netlabel_user.h" -/* do not do any auditing if audit_enabled == 0, see kernel/audit.c for - * details */ -extern int audit_enabled; - /* * NetLabel NETLINK Setup Functions */ @@ -117,8 +113,10 @@ struct audit_buffer *netlbl_audit_start_common(int type, if (audit_info->secid != 0 && security_secid_to_secctx(audit_info->secid, &secctx, - &secctx_len) == 0) + &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); + security_release_secctx(secctx, secctx_len); + } return audit_buf; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1f15821c8da4..5681ce3aebca 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -62,6 +62,7 @@ #include <net/netlink.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) +#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ @@ -314,10 +315,12 @@ netlink_update_listeners(struct sock *sk) unsigned long mask; unsigned int i; - for (i = 0; i < NLGRPSZ(tbl->groups)/sizeof(unsigned long); i++) { + for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; - sk_for_each_bound(sk, node, &tbl->mc_list) - mask |= nlk_sk(sk)->groups[i]; + sk_for_each_bound(sk, node, &tbl->mc_list) { + if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) + mask |= nlk_sk(sk)->groups[i]; + } tbl->listeners[i] = mask; } /* this function is only called with the netlink table "grabbed", which @@ -555,26 +558,37 @@ netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) nlk->subscriptions = subscriptions; } -static int netlink_alloc_groups(struct sock *sk) +static int netlink_realloc_groups(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); unsigned int groups; + unsigned long *new_groups; int err = 0; - netlink_lock_table(); + netlink_table_grab(); + groups = nl_table[sk->sk_protocol].groups; - if (!nl_table[sk->sk_protocol].registered) + if (!nl_table[sk->sk_protocol].registered) { err = -ENOENT; - netlink_unlock_table(); + goto out_unlock; + } - if (err) - return err; + if (nlk->ngroups >= groups) + goto out_unlock; - nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL); - if (nlk->groups == NULL) - return -ENOMEM; + new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC); + if (new_groups == NULL) { + err = -ENOMEM; + goto out_unlock; + } + memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0, + NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); + + nlk->groups = new_groups; nlk->ngroups = groups; - return 0; + out_unlock: + netlink_table_ungrab(); + return err; } static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -591,11 +605,9 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len if (nladdr->nl_groups) { if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; - if (nlk->groups == NULL) { - err = netlink_alloc_groups(sk); - if (err) - return err; - } + err = netlink_realloc_groups(sk); + if (err) + return err; } if (nlk->pid) { @@ -839,10 +851,18 @@ retry: int netlink_has_listeners(struct sock *sk, unsigned int group) { int res = 0; + unsigned long *listeners; BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET)); + + rcu_read_lock(); + listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); + if (group - 1 < nl_table[sk->sk_protocol].groups) - res = test_bit(group - 1, nl_table[sk->sk_protocol].listeners); + res = test_bit(group - 1, listeners); + + rcu_read_unlock(); + return res; } EXPORT_SYMBOL_GPL(netlink_has_listeners); @@ -1007,18 +1027,36 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +/* must be called with netlink table grabbed */ +static void netlink_update_socket_mc(struct netlink_sock *nlk, + unsigned int group, + int is_new) +{ + int old, new = !!is_new, subscriptions; + + old = test_bit(group - 1, nlk->groups); + subscriptions = nlk->subscriptions - old + new; + if (new) + __set_bit(group - 1, nlk->groups); + else + __clear_bit(group - 1, nlk->groups); + netlink_update_subscriptions(&nlk->sk, subscriptions); + netlink_update_listeners(&nlk->sk); +} + static int netlink_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int val = 0, err; + unsigned int val = 0; + int err; if (level != SOL_NETLINK) return -ENOPROTOOPT; if (optlen >= sizeof(int) && - get_user(val, (int __user *)optval)) + get_user(val, (unsigned int __user *)optval)) return -EFAULT; switch (optname) { @@ -1031,27 +1069,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - unsigned int subscriptions; - int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; - if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; - if (nlk->groups == NULL) { - err = netlink_alloc_groups(sk); - if (err) - return err; - } + err = netlink_realloc_groups(sk); + if (err) + return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; netlink_table_grab(); - old = test_bit(val - 1, nlk->groups); - subscriptions = nlk->subscriptions - old + new; - if (new) - __set_bit(val - 1, nlk->groups); - else - __clear_bit(val - 1, nlk->groups); - netlink_update_subscriptions(sk, subscriptions); - netlink_update_listeners(sk); + netlink_update_socket_mc(nlk, val, + optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); err = 0; break; @@ -1327,6 +1354,71 @@ out_sock_release: return NULL; } +/** + * netlink_change_ngroups - change number of multicast groups + * + * This changes the number of multicast groups that are available + * on a certain netlink family. Note that it is not possible to + * change the number of groups to below 32. Also note that it does + * not implicitly call netlink_clear_multicast_users() when the + * number of groups is reduced. + * + * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). + * @groups: The new number of groups. + */ +int netlink_change_ngroups(struct sock *sk, unsigned int groups) +{ + unsigned long *listeners, *old = NULL; + struct netlink_table *tbl = &nl_table[sk->sk_protocol]; + int err = 0; + + if (groups < 32) + groups = 32; + + netlink_table_grab(); + if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { + listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC); + if (!listeners) { + err = -ENOMEM; + goto out_ungrab; + } + old = tbl->listeners; + memcpy(listeners, old, NLGRPSZ(tbl->groups)); + rcu_assign_pointer(tbl->listeners, listeners); + } + tbl->groups = groups; + + out_ungrab: + netlink_table_ungrab(); + synchronize_rcu(); + kfree(old); + return err; +} +EXPORT_SYMBOL(netlink_change_ngroups); + +/** + * netlink_clear_multicast_users - kick off multicast listeners + * + * This function removes all listeners from the given group. + * @ksk: The kernel netlink socket, as returned by + * netlink_kernel_create(). + * @group: The multicast group to clear. + */ +void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) +{ + struct sock *sk; + struct hlist_node *node; + struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; + + netlink_table_grab(); + + sk_for_each_bound(sk, node, &tbl->mc_list) + netlink_update_socket_mc(nlk_sk(sk), group, 0); + + netlink_table_ungrab(); +} +EXPORT_SYMBOL(netlink_clear_multicast_users); + void netlink_set_nonroot(int protocol, unsigned int flags) { if ((unsigned int)protocol < MAX_LINKS) @@ -1713,7 +1805,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations netlink_seq_ops = { +static const struct seq_operations netlink_seq_ops = { .start = netlink_seq_start, .next = netlink_seq_next, .stop = netlink_seq_stop, diff --git a/net/netlink/attr.c b/net/netlink/attr.c index c591212793ee..e4d7bed99c2e 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -72,6 +72,17 @@ static int validate_nla(struct nlattr *nla, int maxtype, return -ERANGE; break; + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; default: if (pt->len) minlen = pt->len; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9ab62f938d0..8c11ca4a2121 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -3,6 +3,7 @@ * * Authors: Jamal Hadi Salim * Thomas Graf <tgraf@suug.ch> + * Johannes Berg <johannes@sipsolutions.net> */ #include <linux/module.h> @@ -13,6 +14,7 @@ #include <linux/string.h> #include <linux/skbuff.h> #include <linux/mutex.h> +#include <linux/bitmap.h> #include <net/sock.h> #include <net/genetlink.h> @@ -42,6 +44,16 @@ static void genl_unlock(void) #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) static struct list_head family_ht[GENL_FAM_TAB_SIZE]; +/* + * Bitmap of multicast groups that are currently in use. + * + * To avoid an allocation at boot of just one unsigned long, + * declare it global instead. + * Bit 0 is marked as already used since group 0 is invalid. + */ +static unsigned long mc_group_start = 0x1; +static unsigned long *mc_groups = &mc_group_start; +static unsigned long mc_groups_longs = 1; static int genl_ctrl_event(int event, void *data); @@ -116,6 +128,122 @@ static inline u16 genl_generate_id(void) return id_gen_idx; } +static struct genl_multicast_group notify_grp; + +/** + * genl_register_mc_group - register a multicast group + * + * Registers the specified multicast group and notifies userspace + * about the new group. + * + * Returns 0 on success or a negative error code. + * + * @family: The generic netlink family the group shall be registered for. + * @grp: The group to register, must have a name. + */ +int genl_register_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + int id; + unsigned long *new_groups; + int err; + + BUG_ON(grp->name[0] == '\0'); + + genl_lock(); + + /* special-case our own group */ + if (grp == ¬ify_grp) + id = GENL_ID_CTRL; + else + id = find_first_zero_bit(mc_groups, + mc_groups_longs * BITS_PER_LONG); + + + if (id >= mc_groups_longs * BITS_PER_LONG) { + size_t nlen = (mc_groups_longs + 1) * sizeof(unsigned long); + + if (mc_groups == &mc_group_start) { + new_groups = kzalloc(nlen, GFP_KERNEL); + if (!new_groups) { + err = -ENOMEM; + goto out; + } + mc_groups = new_groups; + *mc_groups = mc_group_start; + } else { + new_groups = krealloc(mc_groups, nlen, GFP_KERNEL); + if (!new_groups) { + err = -ENOMEM; + goto out; + } + mc_groups = new_groups; + mc_groups[mc_groups_longs] = 0; + } + mc_groups_longs++; + } + + err = netlink_change_ngroups(genl_sock, + mc_groups_longs * BITS_PER_LONG); + if (err) + goto out; + + grp->id = id; + set_bit(id, mc_groups); + list_add_tail(&grp->list, &family->mcast_groups); + grp->family = family; + + genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); + out: + genl_unlock(); + return err; +} +EXPORT_SYMBOL(genl_register_mc_group); + +static void __genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + BUG_ON(grp->family != family); + netlink_clear_multicast_users(genl_sock, grp->id); + clear_bit(grp->id, mc_groups); + list_del(&grp->list); + genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, grp); + grp->id = 0; + grp->family = NULL; +} + +/** + * genl_unregister_mc_group - unregister a multicast group + * + * Unregisters the specified multicast group and notifies userspace + * about it. All current listeners on the group are removed. + * + * Note: It is not necessary to unregister all multicast groups before + * unregistering the family, unregistering the family will cause + * all assigned multicast groups to be unregistered automatically. + * + * @family: Generic netlink family the group belongs to. + * @grp: The group to unregister, must have been registered successfully + * previously. + */ +void genl_unregister_mc_group(struct genl_family *family, + struct genl_multicast_group *grp) +{ + genl_lock(); + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} + +static void genl_unregister_mc_groups(struct genl_family *family) +{ + struct genl_multicast_group *grp, *tmp; + + genl_lock(); + list_for_each_entry_safe(grp, tmp, &family->mcast_groups, list) + __genl_unregister_mc_group(family, grp); + genl_unlock(); +} + /** * genl_register_ops - register generic netlink operations * @family: generic netlink family @@ -216,6 +344,7 @@ int genl_register_family(struct genl_family *family) goto errout; INIT_LIST_HEAD(&family->ops_list); + INIT_LIST_HEAD(&family->mcast_groups); genl_lock(); @@ -275,6 +404,8 @@ int genl_unregister_family(struct genl_family *family) { struct genl_family *rc; + genl_unregister_mc_groups(family); + genl_lock(); list_for_each_entry(rc, genl_family_chain(family->id), family_list) { @@ -410,6 +541,67 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, nla_nest_end(skb, nla_ops); } + if (!list_empty(&family->mcast_groups)) { + struct genl_multicast_group *grp; + struct nlattr *nla_grps; + int idx = 1; + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + list_for_each_entry(grp, &family->mcast_groups, list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + } + nla_nest_end(skb, nla_grps); + } + + return genlmsg_end(skb, hdr); + +nla_put_failure: + return genlmsg_cancel(skb, hdr); +} + +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, + u32 seq, u32 flags, struct sk_buff *skb, + u8 cmd) +{ + void *hdr; + struct nlattr *nla_grps; + struct nlattr *nest; + + hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + if (hdr == NULL) + return -1; + + NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, grp->family->name); + NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, grp->family->id); + + nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + if (nla_grps == NULL) + goto nla_put_failure; + + nest = nla_nest_start(skb, 1); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_MCAST_GRP_ID, grp->id); + NLA_PUT_STRING(skb, CTRL_ATTR_MCAST_GRP_NAME, + grp->name); + + nla_nest_end(skb, nest); + nla_nest_end(skb, nla_grps); + return genlmsg_end(skb, hdr); nla_put_failure: @@ -453,8 +645,8 @@ errout: return skb->len; } -static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, - int seq, u8 cmd) +static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, + u32 pid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -472,6 +664,25 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, return skb; } +static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, + u32 pid, int seq, u8 cmd) +{ + struct sk_buff *skb; + int err; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb == NULL) + return ERR_PTR(-ENOBUFS); + + err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + if (err < 0) { + nlmsg_free(skb); + return ERR_PTR(err); + } + + return skb; +} + static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, @@ -501,8 +712,8 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) goto errout; } - msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq, - CTRL_CMD_NEWFAMILY); + msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto errout; @@ -523,7 +734,15 @@ static int genl_ctrl_event(int event, void *data) switch (event) { case CTRL_CMD_NEWFAMILY: case CTRL_CMD_DELFAMILY: - msg = ctrl_build_msg(data, 0, 0, event); + msg = ctrl_build_family_msg(data, 0, 0, event); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + break; + case CTRL_CMD_NEWMCAST_GRP: + case CTRL_CMD_DELMCAST_GRP: + msg = ctrl_build_mcgrp_msg(data, 0, 0, event); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -541,6 +760,10 @@ static struct genl_ops genl_ctrl_ops = { .policy = ctrl_policy, }; +static struct genl_multicast_group notify_grp = { + .name = "notify", +}; + static int __init genl_init(void) { int i, err; @@ -557,11 +780,17 @@ static int __init genl_init(void) goto errout_register; netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, - genl_rcv, NULL, THIS_MODULE); + + /* we'll bump the group number right afterwards */ + genl_sock = netlink_kernel_create(NETLINK_GENERIC, 0, genl_rcv, + NULL, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); + err = genl_register_mc_group(&genl_ctrl, ¬ify_grp); + if (err < 0) + goto errout_register; + return 0; errout_register: diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5d4a26c2aa0c..dc9273295a38 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -720,7 +720,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, for (;;) { prepare_to_wait(sk->sk_sleep, &wait, - TASK_INTERRUPTIBLE); + TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; if (!signal_pending(current)) { @@ -1328,7 +1328,7 @@ static int nr_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_info_seqops = { +static const struct seq_operations nr_info_seqops = { .start = nr_info_start, .next = nr_info_next, .stop = nr_info_stop, diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 2f76e062609d..24fe4a66d297 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -922,7 +922,7 @@ static int nr_node_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_node_seqops = { +static const struct seq_operations nr_node_seqops = { .start = nr_node_start, .next = nr_node_next, .stop = nr_node_stop, @@ -1006,7 +1006,7 @@ static int nr_neigh_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations nr_neigh_seqops = { +static const struct seq_operations nr_neigh_seqops = { .start = nr_neigh_start, .next = nr_neigh_next, .stop = nr_neigh_stop, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f8b83014ccca..1322d62b5d97 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -108,7 +108,7 @@ Outgoing, dev->hard_header!=NULL Incoming, dev->hard_header==NULL mac_header -> UNKNOWN position. It is very likely, that it points to ll header. PPP makes it, that is wrong, because introduce - assymetry between rx and tx paths. + assymetry between rx and tx paths. data -> data Outgoing, dev->hard_header==NULL @@ -1928,7 +1928,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations packet_seq_ops = { +static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index 230e35c59786..9f746be58854 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -83,7 +83,7 @@ static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); static void rfkill_event(struct input_handle *handle, unsigned int type, - unsigned int code, int down) + unsigned int code, int down) { if (type == EV_KEY && down == 1) { switch (code) { diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index f3986d498b40..db3395bfbcfa 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -187,7 +187,7 @@ static ssize_t rfkill_claim_store(struct device *dev, static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), - __ATTR(state, S_IRUGO, rfkill_state_show, rfkill_state_store), + __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL }; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d476c43d5216..976c3cc86a29 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -816,7 +816,7 @@ rose_try_next_neigh: for (;;) { prepare_to_wait(sk->sk_sleep, &wait, - TASK_INTERRUPTIBLE); + TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; if (!signal_pending(current)) { @@ -1454,7 +1454,7 @@ static int rose_info_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_info_seqops = { +static const struct seq_operations rose_info_seqops = { .start = rose_info_start, .next = rose_info_next, .stop = rose_info_stop, diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index cd01642f0491..114df6eec8c3 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -79,7 +79,7 @@ static void rose_loopback_timer(unsigned long param) skb_reset_transport_header(skb); - sk = rose_find_socket(lci_o, &rose_loopback_neigh); + sk = rose_find_socket(lci_o, rose_loopback_neigh); if (sk) { if (rose_process_rx_frame(sk, skb) == 0) kfree_skb(skb); @@ -88,7 +88,7 @@ static void rose_loopback_timer(unsigned long param) if (frametype == ROSE_CALL_REQUEST) { if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, &rose_loopback_neigh, lci_o) == 0) + if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) kfree_skb(skb); } else { kfree_skb(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 929a784a86d7..96f61a71b252 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -45,7 +45,7 @@ static DEFINE_SPINLOCK(rose_neigh_list_lock); static struct rose_route *rose_route_list; static DEFINE_SPINLOCK(rose_route_list_lock); -struct rose_neigh rose_loopback_neigh; +struct rose_neigh *rose_loopback_neigh; /* * Add a new route to a node, and in the process add the node and the @@ -362,7 +362,12 @@ out: */ void rose_add_loopback_neigh(void) { - struct rose_neigh *sn = &rose_loopback_neigh; + struct rose_neigh *sn; + + rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_KERNEL); + if (!rose_loopback_neigh) + return; + sn = rose_loopback_neigh; sn->callsign = null_ax25_address; sn->digipeat = NULL; @@ -417,13 +422,13 @@ int rose_add_loopback_node(rose_address *address) rose_node->mask = 10; rose_node->count = 1; rose_node->loopback = 1; - rose_node->neighbour[0] = &rose_loopback_neigh; + rose_node->neighbour[0] = rose_loopback_neigh; /* Insert at the head of list. Address is always mask=10 */ rose_node->next = rose_node_list; rose_node_list = rose_node; - rose_loopback_neigh.count++; + rose_loopback_neigh->count++; out: spin_unlock_bh(&rose_node_list_lock); @@ -454,7 +459,7 @@ void rose_del_loopback_node(rose_address *address) rose_remove_node(rose_node); - rose_loopback_neigh.count--; + rose_loopback_neigh->count--; out: spin_unlock_bh(&rose_node_list_lock); @@ -1118,7 +1123,7 @@ static int rose_node_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_node_seqops = { +static const struct seq_operations rose_node_seqops = { .start = rose_node_start, .next = rose_node_next, .stop = rose_node_stop, @@ -1200,7 +1205,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) } -static struct seq_operations rose_neigh_seqops = { +static const struct seq_operations rose_neigh_seqops = { .start = rose_neigh_start, .next = rose_neigh_next, .stop = rose_neigh_stop, @@ -1284,7 +1289,7 @@ static int rose_route_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rose_route_seqops = { +static const struct seq_operations rose_route_seqops = { .start = rose_route_start, .next = rose_route_next, .stop = rose_route_stop, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2c57df9c131b..c58fa0d1be26 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -787,12 +787,12 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb)); - rxrpc_epoch = htonl(xtime.tv_sec); + rxrpc_epoch = htonl(get_seconds()); ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!rxrpc_call_jar) { printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n"); goto error_call_jar; @@ -805,26 +805,26 @@ static int __init af_rxrpc_init(void) } ret = proto_register(&rxrpc_proto, 1); - if (ret < 0) { - printk(KERN_CRIT "RxRPC: Cannot register protocol\n"); + if (ret < 0) { + printk(KERN_CRIT "RxRPC: Cannot register protocol\n"); goto error_proto; } ret = sock_register(&rxrpc_family_ops); if (ret < 0) { - printk(KERN_CRIT "RxRPC: Cannot register socket family\n"); + printk(KERN_CRIT "RxRPC: Cannot register socket family\n"); goto error_sock; } ret = register_key_type(&key_type_rxrpc); if (ret < 0) { - printk(KERN_CRIT "RxRPC: Cannot register client key type\n"); + printk(KERN_CRIT "RxRPC: Cannot register client key type\n"); goto error_key_type; } ret = register_key_type(&key_type_rxrpc_s); if (ret < 0) { - printk(KERN_CRIT "RxRPC: Cannot register server key type\n"); + printk(KERN_CRIT "RxRPC: Cannot register server key type\n"); goto error_key_type_s; } diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 482750efc235..d6667f7bc85e 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -71,7 +71,7 @@ struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx, struct rb_node *p, *parent, **pp; _enter("%p{%x},%x,%hx,", - rx, key_serial(key), trans->debug_id, ntohl(service_id)); + rx, key_serial(key), trans->debug_id, ntohs(service_id)); if (rx->trans == trans && rx->bundle) { atomic_inc(&rx->bundle->usage); @@ -791,7 +791,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) ASSERTCMP(atomic_read(&conn->usage), >, 0); - conn->put_time = xtime.tv_sec; + conn->put_time = get_seconds(); if (atomic_dec_and_test(&conn->usage)) { _debug("zombie"); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -835,7 +835,7 @@ void rxrpc_connection_reaper(struct work_struct *work) _enter(""); - now = xtime.tv_sec; + now = get_seconds(); earliest = ULONG_MAX; write_lock_bh(&rxrpc_connection_lock); diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 1c0be0e77b16..2e83ce325d15 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -30,31 +30,13 @@ static const char *rxrpc_conn_states[] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { - struct list_head *_p; - loff_t pos = *_pos; - read_lock(&rxrpc_call_lock); - if (!pos) - return SEQ_START_TOKEN; - pos--; - - list_for_each(_p, &rxrpc_calls) - if (!pos--) - break; - - return _p != &rxrpc_calls ? _p : NULL; + return seq_list_start_head(&rxrpc_calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *_p; - - (*pos)++; - - _p = v; - _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next; - - return _p != &rxrpc_calls ? _p : NULL; + return seq_list_next(v, &rxrpc_calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) @@ -68,7 +50,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_call *call; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; - if (v == SEQ_START_TOKEN) { + if (v == &rxrpc_calls) { seq_puts(seq, "Proto Local Remote " " SvID ConnID CallID End Use State Abort " @@ -104,7 +86,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rxrpc_call_seq_ops = { +static const struct seq_operations rxrpc_call_seq_ops = { .start = rxrpc_call_seq_start, .next = rxrpc_call_seq_next, .stop = rxrpc_call_seq_stop, @@ -129,32 +111,14 @@ struct file_operations rxrpc_call_seq_fops = { */ static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { - struct list_head *_p; - loff_t pos = *_pos; - read_lock(&rxrpc_connection_lock); - if (!pos) - return SEQ_START_TOKEN; - pos--; - - list_for_each(_p, &rxrpc_connections) - if (!pos--) - break; - - return _p != &rxrpc_connections ? _p : NULL; + return seq_list_start_head(&rxrpc_connections, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *_p; - - (*pos)++; - - _p = v; - _p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next; - - return _p != &rxrpc_connections ? _p : NULL; + return seq_list_next(v, &rxrpc_connections, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) @@ -168,7 +132,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) struct rxrpc_transport *trans; char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; - if (v == SEQ_START_TOKEN) { + if (v == &rxrpc_connections) { seq_puts(seq, "Proto Local Remote " " SvID ConnID Calls End Use State Key " @@ -206,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations rxrpc_connection_seq_ops = { +static const struct seq_operations rxrpc_connection_seq_ops = { .start = rxrpc_connection_seq_start, .next = rxrpc_connection_seq_next, .stop = rxrpc_connection_seq_stop, diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index d43d78f19302..bb282a6a19f0 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -183,7 +183,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans) ASSERTCMP(atomic_read(&trans->usage), >, 0); - trans->put_time = xtime.tv_sec; + trans->put_time = get_seconds(); if (unlikely(atomic_dec_and_test(&trans->usage))) _debug("zombie"); /* let the reaper determine the timeout to avoid a race with @@ -219,7 +219,7 @@ static void rxrpc_transport_reaper(struct work_struct *work) _enter(""); - now = xtime.tv_sec; + now = get_seconds(); earliest = ULONG_MAX; /* extract all the transports that have been dead too long */ diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 5ec705144e10..ac3cabdca78c 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -916,7 +916,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, issue = be32_to_cpu(stamp); } p += 4; - now = xtime.tv_sec; + now = get_seconds(); _debug("KIV ISSUE: %lx [%lx]", issue, now); /* check the ticket is in date */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475df8449be9..8a74cac0be8c 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -97,7 +97,7 @@ config NET_SCH_ATM select classes of this queuing discipline. Each class maps the flow(s) it is handling to a given virtual circuit. - See the top of <file:net/sched/sch_atm.c>) for more details. + See the top of <file:net/sched/sch_atm.c> for more details. To compile this code as a module, choose M here: the module will be called sch_atm. @@ -111,6 +111,17 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_RR + tristate "Multi Band Round Robin Queuing (RR)" + select NET_SCH_PRIO + ---help--- + Say Y here if you want to use an n-band round robin packet + scheduler. + + The module uses sch_prio for its framework and is aliased as + sch_rr, so it will load sch_prio, although it is referred + to using sch_rr. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- @@ -126,7 +137,7 @@ config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) - packet scheduling algorithm . + packet scheduling algorithm. See the top of <file:net/sched/sch_sfq.c> for more details. @@ -275,7 +286,6 @@ config CLS_U32_MARK config NET_CLS_RSVP tristate "IPv4 Resource Reservation Protocol (RSVP)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -290,14 +300,13 @@ config NET_CLS_RSVP config NET_CLS_RSVP6 tristate "IPv6 Resource Reservation Protocol (RSVP6)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this is important for real time data such as streaming sound or video. Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6. + on their RSVP requests and you are using the IPv6 protocol. To compile this code as a module, choose M here: the module will be called cls_rsvp6. @@ -382,7 +391,6 @@ config NET_EMATCH_TEXT config NET_CLS_ACT bool "Actions" - select NET_ESTIMATOR ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful @@ -464,13 +472,12 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" - depends on NET_CLS_ACT!=y - select NET_ESTIMATOR + select NET_CLS_ACT + select NET_ACT_POLICE ---help--- Say Y here if you want to do traffic policing, i.e. strict - bandwidth limiting. This option is obsoleted by the traffic - policer implemented as action, it stays here for compatibility - reasons. + bandwidth limiting. This option is obsolete and just selects + the option replacing it. It will be removed in the future. config NET_CLS_IND bool "Incoming device classification" @@ -480,14 +487,6 @@ config NET_CLS_IND classification based on the incoming device. This option is likely to disappear in favour of the metadata ematch. -config NET_ESTIMATOR - bool "Rate estimator" - ---help--- - Say Y here to allow using rate estimators to estimate the current - rate-of-flow for network devices, queues, etc. This module is - automatically selected if needed but can be selected manually for - statistical purposes. - endif # NET_SCHED endmenu diff --git a/net/sched/Makefile b/net/sched/Makefile index 020767a204d4..b67c36f65cf2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o -obj-$(CONFIG_NET_CLS_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 711dd26c95c3..72cdb0fade20 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -11,23 +11,13 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <net/sock.h> #include <net/sch_generic.h> #include <net/act_api.h> #include <net/netlink.h> @@ -42,10 +32,8 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_lock_bh(hinfo->lock); *p1p = p->tcfc_next; write_unlock_bh(hinfo->lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); -#endif kfree(p); return; } @@ -80,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; struct rtattr *r ; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); s_i = cb->args[0]; @@ -108,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -168,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { struct tcf_common *p; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; p = p->tcfc_next) { if (p->tcfc_index == index) break; } - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); return p; } @@ -232,15 +220,12 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); - p->tcfc_stats_lock = &p->tcfc_lock; p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - p->tcfc_stats_lock, est); -#endif + &p->tcfc_lock, est); a->priv = (void *) p; return p; } @@ -599,12 +584,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); + TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->tcf_stats_lock, &d); + &h->tcf_lock, &d); if (err < 0) goto errout; @@ -614,9 +599,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7517f3791541..a9631e426d91 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -10,26 +10,15 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 00b05f422d45..6b407ece953c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -11,27 +11,15 @@ * Copyright: Jamal Hadi Salim (2002-4) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> -#include <linux/kmod.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_ipt.h> #include <net/tc_act/tc_ipt.h> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index de21c92faaa2..579578944ae7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -12,31 +12,19 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#include <linux/etherdevice.h> #include <linux/if_arp.h> #define MIRRED_TAB_MASK 7 diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6f8684b5617e..b46fab5fb323 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -9,26 +9,15 @@ * Authors: Jamal Hadi Salim (2002-4) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 616f465f407e..17f6f27e28a2 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -10,25 +10,14 @@ * J Hadi Salim (action changes) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> -#include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/init.h> -#include <net/sock.h> #include <net/act_api.h> #include <net/netlink.h> @@ -60,7 +49,6 @@ struct tc_police_compat /* Each policer is serialized by its individual spinlock */ -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { @@ -68,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; - read_lock(&police_lock); + read_lock_bh(&police_lock); s_i = cb->args[0]; @@ -97,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock(&police_lock); + read_unlock_bh(&police_lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -106,9 +94,8 @@ rtattr_failure: nlmsg_trim(skb, r); goto done; } -#endif -void tcf_police_destroy(struct tcf_police *p) +static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); struct tcf_common **p1p; @@ -118,10 +105,8 @@ void tcf_police_destroy(struct tcf_police *p) write_lock_bh(&police_lock); *p1p = p->tcf_next; write_unlock_bh(&police_lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); -#endif if (p->tcfp_R_tab) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) @@ -133,7 +118,6 @@ void tcf_police_destroy(struct tcf_police *p) BUG_TRAP(0); } -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { @@ -185,7 +169,6 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, ret = ACT_P_CREATED; police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (bind) police->tcf_bindcnt = 1; override: @@ -227,15 +210,13 @@ override: police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); -#endif + &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -262,10 +243,19 @@ failure: static int tcf_act_police_cleanup(struct tc_action *a, int bind) { struct tcf_police *p = a->priv; + int ret = 0; + + if (p != NULL) { + if (bind) + p->tcf_bindcnt--; - if (p != NULL) - return tcf_police_release(p, bind); - return 0; + p->tcf_refcnt--; + if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { + tcf_police_destroy(p); + ret = 1; + } + } + return ret; } static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, @@ -281,14 +271,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { @@ -348,10 +336,8 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -391,240 +377,3 @@ police_cleanup_module(void) module_init(police_init_module); module_exit(police_cleanup_module); - -#else /* CONFIG_NET_CLS_ACT */ - -static struct tcf_common *tcf_police_lookup(u32 index) -{ - struct tcf_hashinfo *hinfo = &police_hash_info; - struct tcf_common *p; - - read_lock(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { - if (p->tcfc_index == index) - break; - } - read_unlock(hinfo->lock); - - return p; -} - -static u32 tcf_police_new_index(void) -{ - u32 *idx_gen = &police_idx_gen; - u32 val = *idx_gen; - - do { - if (++val == 0) - val = 1; - } while (tcf_police_lookup(val)); - - return (*idx_gen = val); -} - -struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) -{ - unsigned int h; - struct tcf_police *police; - struct rtattr *tb[TCA_POLICE_MAX]; - struct tc_police *parm; - int size; - - if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) - return NULL; - - if (tb[TCA_POLICE_TBF-1] == NULL) - return NULL; - size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); - if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) - return NULL; - - parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - - if (parm->index) { - struct tcf_common *pc; - - pc = tcf_police_lookup(parm->index); - if (pc) { - police = to_police(pc); - police->tcf_refcnt++; - return police; - } - } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (unlikely(!police)) - return NULL; - - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; - if (parm->rate.rate) { - police->tcfp_R_tab = - qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (police->tcfp_R_tab == NULL) - goto failure; - if (parm->peakrate.rate) { - police->tcfp_P_tab = - qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (police->tcfp_P_tab == NULL) - goto failure; - } - } - if (tb[TCA_POLICE_RESULT-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) - goto failure; - police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - } -#ifdef CONFIG_NET_ESTIMATOR - if (tb[TCA_POLICE_AVRATE-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) - goto failure; - police->tcfp_ewma_rate = - *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); - } -#endif - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; - } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - police->tcfp_t_c = psched_get_time(); - police->tcf_index = parm->index ? parm->index : - tcf_police_new_index(); - police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR - if (est) - gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); -#endif - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; - write_unlock_bh(&police_lock); - return police; - -failure: - if (police->tcfp_R_tab) - qdisc_put_rtab(police->tcfp_R_tab); - kfree(police); - return NULL; -} - -int tcf_police(struct sk_buff *skb, struct tcf_police *police) -{ - psched_time_t now; - long toks; - long ptoks = 0; - - spin_lock(&police->tcf_lock); - - police->tcf_bstats.bytes += skb->len; - police->tcf_bstats.packets++; - -#ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; - } -#endif - if (skb->len <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { - ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); - } - toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) - toks = police->tcfp_burst; - toks -= L2T(police, skb->len); - if ((toks|ptoks) >= 0) { - police->tcfp_t_c = now; - police->tcfp_toks = toks; - police->tcfp_ptoks = ptoks; - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - } - - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; -} -EXPORT_SYMBOL(tcf_police); - -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (police->tcfp_result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), - &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif - return skb->len; - -rtattr_failure: - nlmsg_trim(skb, b); - return -1; -} - -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) -{ - struct gnet_dump d; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, police->tcf_stats_lock, - &d) < 0) - goto errout; - - if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || -#endif - gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) - goto errout; - - if (gnet_stats_finish_copy(&d) < 0) - goto errout; - - return 0; - -errout: - return -1; -} - -#endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 36e1edad5990..fb84ef33d14f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/netlink.h> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ebf94edf0478..5f0fbca7393f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -14,26 +14,16 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/netlink.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -468,11 +458,6 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) tcf_action_destroy(exts->action, TCA_ACT_UNBIND); exts->action = NULL; } -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) { - tcf_police_release(exts->police, TCA_ACT_UNBIND); - exts->police = NULL; - } #endif } @@ -506,17 +491,6 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb, exts->action = act; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && tb[map->police-1]) { - struct tcf_police *p; - - p = tcf_police_locate(tb[map->police-1], rate_tlv); - if (p == NULL) - return -EINVAL; - - exts->police = p; - } else if (map->action && tb[map->action-1]) - return -EOPNOTSUPP; #else if ((map->action && tb[map->action-1]) || (map->police && tb[map->police-1])) @@ -539,15 +513,6 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); } -#elif defined CONFIG_NET_CLS_POLICE - if (src->police) { - struct tcf_police *p; - tcf_tree_lock(tp); - p = xchg(&dst->police, src->police); - tcf_tree_unlock(tp); - if (p) - tcf_police_release(p, TCA_ACT_UNBIND); - } #endif } @@ -576,17 +541,6 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && exts->police) { - struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); - - RTA_PUT(skb, map->police, 0, NULL); - - if (tcf_police_dump(skb, exts->police) < 0) - goto rtattr_failure; - - p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; - } #endif return 0; rtattr_failure: __attribute__ ((unused)) @@ -601,10 +555,6 @@ tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, if (exts->action) if (tcf_action_copy_stats(skb, exts->action, 1) < 0) goto rtattr_failure; -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) - if (tcf_police_dump_stats(skb, exts->police) < 0) - goto rtattr_failure; #endif return 0; rtattr_failure: __attribute__ ((unused)) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c885412d79d5..8dbcf2771a46 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -13,7 +13,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index bbec4a0d4dcb..8adbd6a37d14 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,29 +19,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <linux/netfilter.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index cc941d0ee3a5..0a8409c1d28a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,28 +10,14 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/dst.h> +#include <net/route.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index 0a683c07c648..cbb5e0d600f3 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -10,27 +10,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/ip.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index 93b6abed57db..dd08aea2aee5 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -10,28 +10,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> #include <linux/ipv6.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/netlink.h> diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 47ac0c556429..2314820a080a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,12 +9,9 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/errno.h> -#include <linux/netdevice.h> -#include <net/ip.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> -#include <net/route.h> /* diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7a347bd6d70..d4d5d2f271d2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -30,30 +30,14 @@ * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/rtnetlink.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> @@ -518,7 +502,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV-1]) { - int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); + err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); if (err < 0) goto errout; } @@ -798,9 +782,6 @@ static int __init init_u32(void) #ifdef CONFIG_CLS_U32_PERF printk(" Performance counters on\n"); #endif -#ifdef CONFIG_NET_CLS_POLICE - printk(" OLD policer on \n"); -#endif #ifdef CONFIG_NET_CLS_IND printk(" input device check on \n"); #endif diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 8d6dacd81900..cc49c932641d 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -98,3 +98,4 @@ MODULE_LICENSE("GPL"); module_init(init_em_cmp); module_exit(exit_em_cmp); +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 60acf8cdb27b..650f09c8bd6a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -848,3 +848,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_meta); module_exit(exit_em_meta); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_META); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index b4b36efce292..370a1b2ea317 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -76,3 +76,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_nbyte); module_exit(exit_em_nbyte); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index e8f46169449d..d5cd86efb7d0 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -150,3 +150,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_text); module_exit(exit_em_text); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT); diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 0a2a7fe08de3..112796e4a7c4 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -60,3 +60,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_u32); module_exit(exit_em_u32); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 63146d339d81..f3a104e323bd 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -84,9 +84,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/mm.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <net/pkt_cls.h> @@ -224,6 +222,19 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; +#ifdef CONFIG_KMOD + __rtnl_unlock(); + request_module("ematch-kind-%u", em_hdr->kind); + rtnl_lock(); + em->ops = tcf_em_lookup(em_hdr->kind); + if (em->ops) { + /* We dropped the RTNL mutex in order to + * perform the module load. Tell the caller + * to replay the request. */ + module_put(em->ops->owner); + err = -EAGAIN; + } +#endif goto errout; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7a6b0b7cc68a..dee0d5fb39c5 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -19,30 +19,18 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/list.h> -#include <linux/bitops.h> #include <linux/hrtimer.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> -#include <asm/processor.h> -#include <asm/uaccess.h> -#include <asm/system.h> - static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, @@ -392,6 +380,10 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) return; while ((parentid = sch->parent)) { sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + if (sch == NULL) { + WARN_ON(parentid != TC_H_ROOT); + return; + } cops = sch->ops->cl_ops; if (cops->qlen_notify) { cl = cops->get(sch, parentid); @@ -432,8 +424,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, unsigned long cl = cops->get(parent, classid); if (cl) { err = cops->graft(parent, cl, new, old); - if (new) - new->parent = classid; cops->put(parent, cl); } } @@ -448,7 +438,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) +qdisc_create(struct net_device *dev, u32 parent, u32 handle, + struct rtattr **tca, int *errp) { int err; struct rtattr *kind = tca[TCA_KIND-1]; @@ -494,6 +485,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out2; } + sch->parent = parent; + if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; sch->stats_lock = &dev->ingress_lock; @@ -511,7 +504,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, @@ -527,7 +519,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out3; } } -#endif qdisc_lock_tree(dev); list_add_tail(&sch->list, &dev->qdisc_list); qdisc_unlock_tree(dev); @@ -555,11 +546,9 @@ static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) if (err) return err; } -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -774,9 +763,11 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tca, &err); + q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + tca, &err); else - q = qdisc_create(dev, tcm->tcm_handle, tca, &err); + q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + tca, &err); if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -835,9 +826,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto rtattr_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &q->qstats) < 0) goto rtattr_failure; @@ -1163,47 +1152,57 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) to this qdisc, (optionally) tests for protocol and asks specific classifiers. */ +int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + __be16 protocol = skb->protocol; + int err = 0; + + for (; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) { +#ifdef CONFIG_NET_CLS_ACT + if (err != TC_ACT_RECLASSIFY && skb->tc_verd) + skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); +#endif + return err; + } + } + return -1; +} +EXPORT_SYMBOL(tc_classify_compat); + int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { int err = 0; - __be16 protocol = skb->protocol; + __be16 protocol; #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *otp = tp; reclassify: #endif protocol = skb->protocol; - for ( ; tp; tp = tp->next) { - if ((tp->protocol == protocol || - tp->protocol == htons(ETH_P_ALL)) && - (err = tp->classify(skb, tp, res)) >= 0) { + err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT - if ( TC_ACT_RECLASSIFY == err) { - __u32 verd = (__u32) G_TC_VERD(skb->tc_verd); - tp = otp; - - if (MAX_REC_LOOP < verd++) { - printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n", - tp->prio&0xffff, ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd); - goto reclassify; - } else { - if (skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd,0); - return err; - } -#else - - return err; -#endif + if (err == TC_ACT_RECLASSIFY) { + u32 verd = G_TC_VERD(skb->tc_verd); + tp = otp; + + if (verd++ >= MAX_REC_LOOP) { + printk("rule prio %u protocol %02x reclassify loop, " + "packet dropped\n", + tp->prio&0xffff, ntohs(tp->protocol)); + return TC_ACT_SHOT; } - + skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); + goto reclassify; } - return -1; +#endif + return err; } +EXPORT_SYMBOL(tc_classify); void tcf_destroy(struct tcf_proto *tp) { @@ -1270,4 +1269,3 @@ EXPORT_SYMBOL(qdisc_get_rtab); EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(register_qdisc); EXPORT_SYMBOL(unregister_qdisc); -EXPORT_SYMBOL(tc_classify); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d1c383fca82c..ddc4f2c54379 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -2,24 +2,19 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/interrupt.h> #include <linux/atmdev.h> #include <linux/atmclip.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> -#include <linux/file.h> /* for fput */ +#include <linux/file.h> /* for fput */ #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/sock.h> - -extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ +extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #if 0 /* control */ #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) @@ -33,7 +28,6 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #define D2PRINTK(format,args...) #endif - /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. @@ -55,23 +49,21 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ * - should lock the flow while there is data in the queue (?) */ - #define PRIV(sch) qdisc_priv(sch) #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - struct atm_flow_data { - struct Qdisc *q; /* FIFO, TBF, etc. */ + struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto *filter_list; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */ + struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ + void (*old_pop)(struct atm_vcc *vcc, + struct sk_buff * skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ int ref; /* reference count */ struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; - spinlock_t *stats_lock; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ @@ -86,76 +78,74 @@ struct atm_qdisc_data { struct tasklet_struct task; /* requeue tasklet */ }; - /* ------------------------- Class/flow operations ------------------------- */ - -static int find_flow(struct atm_qdisc_data *qdisc,struct atm_flow_data *flow) +static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow) { struct atm_flow_data *walk; - DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc,flow); + DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow); for (walk = qdisc->flows; walk; walk = walk->next) - if (walk == flow) return 1; + if (walk == flow) + return 1; DPRINTK("find_flow: not found\n"); return 0; } - -static __inline__ struct atm_flow_data *lookup_flow(struct Qdisc *sch, - u32 classid) +static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; for (flow = p->flows; flow; flow = flow->next) - if (flow->classid == classid) break; + if (flow->classid == classid) + break; return flow; } - -static int atm_tc_graft(struct Qdisc *sch,unsigned long arg, - struct Qdisc *new,struct Qdisc **old) +static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; - - DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch, - p,flow,new,old); - if (!find_flow(p,flow)) return -EINVAL; - if (!new) new = &noop_qdisc; - *old = xchg(&flow->q,new); - if (*old) qdisc_reset(*old); + struct atm_flow_data *flow = (struct atm_flow_data *)arg; + + DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", + sch, p, flow, new, old); + if (!find_flow(p, flow)) + return -EINVAL; + if (!new) + new = &noop_qdisc; + *old = xchg(&flow->q, new); + if (*old) + qdisc_reset(*old); return 0; } - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl) +static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) { - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch,flow); + DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); return flow ? flow->q : NULL; } - -static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid) +static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) { - struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch); + struct atm_qdisc_data *p __maybe_unused = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); - flow = lookup_flow(sch,classid); - if (flow) flow->ref++; - DPRINTK("atm_tc_get: flow %p\n",flow); - return (unsigned long) flow; + DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); + flow = lookup_flow(sch, classid); + if (flow) + flow->ref++; + DPRINTK("atm_tc_get: flow %p\n", flow); + return (unsigned long)flow; } - static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) + unsigned long parent, u32 classid) { - return atm_tc_get(sch,classid); + return atm_tc_get(sch, classid); } /* @@ -163,72 +153,75 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch, * requested (atm_tc_destroy, etc.). The assumption here is that we never drop * anything that still seems to be in use. */ - static void atm_tc_put(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; struct atm_flow_data **prev; - DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (--flow->ref) return; + DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (--flow->ref) + return; DPRINTK("atm_tc_put: destroying\n"); for (prev = &p->flows; *prev; prev = &(*prev)->next) - if (*prev == flow) break; + if (*prev == flow) + break; if (!*prev) { - printk(KERN_CRIT "atm_tc_put: class %p not found\n",flow); + printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow); return; } *prev = flow->next; - DPRINTK("atm_tc_put: qdisc %p\n",flow->q); + DPRINTK("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); tcf_destroy_chain(flow->filter_list); if (flow->sock) { DPRINTK("atm_tc_put: f_count %d\n", - file_count(flow->sock->file)); + file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); } - if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess); - if (flow != &p->link) kfree(flow); + if (flow->excess) + atm_tc_put(sch, (unsigned long)flow->excess); + if (flow != &p->link) + kfree(flow); /* * If flow == &p->link, the qdisc no longer works at this point and * needs to be removed. (By the caller of atm_tc_put.) */ } - -static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb) +static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) { struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n",vcc,skb,p); - VCC2FLOW(vcc)->old_pop(vcc,skb); + D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); + VCC2FLOW(vcc)->old_pop(vcc, skb); tasklet_schedule(&p->task); } static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ + 0xaa, /* DSAP: non-ISO */ + 0xaa, /* SSAP: non-ISO */ + 0x03, /* Ctrl: Unnumbered Information Command PDU */ + 0x00, /* OUI: EtherType */ 0x00, 0x00, - 0x08, 0x00 }; /* Ethertype IP (0800) */ + 0x08, 0x00 +}; /* Ethertype IP (0800) */ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct rtattr **tca, unsigned long *arg) + struct rtattr **tca, unsigned long *arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) *arg; + struct atm_flow_data *flow = (struct atm_flow_data *)*arg; struct atm_flow_data *excess = NULL; - struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *opt = tca[TCA_OPTIONS - 1]; struct rtattr *tb[TCA_ATM_MAX]; struct socket *sock; - int fd,error,hdr_len; + int fd, error, hdr_len; void *hdr; DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n",sch,p,classid,parent,flow,opt); + "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); /* * The concept of parents doesn't apply for this qdisc. */ @@ -241,33 +234,36 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, * class needs to be removed and a new one added. (This may be changed * later.) */ - if (flow) return -EBUSY; + if (flow) + return -EBUSY; if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt)) return -EINVAL; - if (!tb[TCA_ATM_FD-1] || RTA_PAYLOAD(tb[TCA_ATM_FD-1]) < sizeof(fd)) + if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd)) return -EINVAL; - fd = *(int *) RTA_DATA(tb[TCA_ATM_FD-1]); - DPRINTK("atm_tc_change: fd %d\n",fd); - if (tb[TCA_ATM_HDR-1]) { - hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR-1]); - hdr = RTA_DATA(tb[TCA_ATM_HDR-1]); - } - else { + fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]); + DPRINTK("atm_tc_change: fd %d\n", fd); + if (tb[TCA_ATM_HDR - 1]) { + hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]); + hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]); + } else { hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ + hdr = NULL; /* default LLC/SNAP for IP */ } - if (!tb[TCA_ATM_EXCESS-1]) excess = NULL; + if (!tb[TCA_ATM_EXCESS - 1]) + excess = NULL; else { - if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS-1]) != sizeof(u32)) + if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32)) return -EINVAL; - excess = (struct atm_flow_data *) atm_tc_get(sch, - *(u32 *) RTA_DATA(tb[TCA_ATM_EXCESS-1])); - if (!excess) return -ENOENT; + excess = (struct atm_flow_data *) + atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1])); + if (!excess) + return -ENOENT; } DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->rta_type,RTA_PAYLOAD(opt),hdr_len); - if (!(sock = sockfd_lookup(fd,&error))) return error; /* f_count++ */ - DPRINTK("atm_tc_change: f_count %d\n",file_count(sock->file)); + opt->rta_type, RTA_PAYLOAD(opt), hdr_len); + if (!(sock = sockfd_lookup(fd, &error))) + return error; /* f_count++ */ + DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; @@ -280,37 +276,36 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (find_flow(p,flow)) { + if (find_flow(p, flow)) { error = -EEXIST; goto err_out; } - } - else { + } else { int i; unsigned long cl; for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle,0x8000 | i); - if (!(cl = atm_tc_get(sch,classid))) break; - atm_tc_put(sch,cl); + classid = TC_H_MAKE(sch->handle, 0x8000 | i); + if (!(cl = atm_tc_get(sch, classid))) + break; + atm_tc_put(sch, cl); } } - DPRINTK("atm_tc_change: new id %x\n",classid); - flow = kmalloc(sizeof(struct atm_flow_data)+hdr_len,GFP_KERNEL); - DPRINTK("atm_tc_change: flow %p\n",flow); + DPRINTK("atm_tc_change: new id %x\n", classid); + flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); + DPRINTK("atm_tc_change: flow %p\n", flow); if (!flow) { error = -ENOBUFS; goto err_out; } - memset(flow,0,sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) + if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) flow->q = &noop_qdisc; - DPRINTK("atm_tc_change: qdisc %p\n",flow->q); + DPRINTK("atm_tc_change: qdisc %p\n", flow->q); flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ + flow->vcc = ATM_SD(sock); /* speedup */ flow->vcc->user_back = flow; - DPRINTK("atm_tc_change: vcc %p\n",flow->vcc); + DPRINTK("atm_tc_change: vcc %p\n", flow->vcc); flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; @@ -321,50 +316,53 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, p->link.next = flow; flow->hdr_len = hdr_len; if (hdr) - memcpy(flow->hdr,hdr,hdr_len); + memcpy(flow->hdr, hdr, hdr_len); else - memcpy(flow->hdr,llc_oui_ip,sizeof(llc_oui_ip)); - *arg = (unsigned long) flow; + memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); + *arg = (unsigned long)flow; return 0; err_out: - if (excess) atm_tc_put(sch,(unsigned long) excess); + if (excess) + atm_tc_put(sch, (unsigned long)excess); sockfd_put(sock); return error; } - -static int atm_tc_delete(struct Qdisc *sch,unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; - DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (!find_flow(PRIV(sch),flow)) return -EINVAL; - if (flow->filter_list || flow == &p->link) return -EBUSY; + DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (!find_flow(PRIV(sch), flow)) + return -EINVAL; + if (flow->filter_list || flow == &p->link) + return -EBUSY; /* * Reference count must be 2: one for "keepalive" (set at class * creation), and one for the reference held when calling delete. */ if (flow->ref < 2) { - printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n",flow->ref); + printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); return -EINVAL; } - if (flow->ref > 2) return -EBUSY; /* catch references via excess, etc.*/ - atm_tc_put(sch,arg); + if (flow->ref > 2) + return -EBUSY; /* catch references via excess, etc. */ + atm_tc_put(sch, arg); return 0; } - -static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) +static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); - if (walker->stop) return; + DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + if (walker->stop) + return; for (flow = p->flows; flow; flow = flow->next) { if (walker->count >= walker->skip) - if (walker->fn(sch,(unsigned long) flow,walker) < 0) { + if (walker->fn(sch, (unsigned long)flow, walker) < 0) { walker->stop = 1; break; } @@ -372,73 +370,71 @@ static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) } } - -static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch,unsigned long cl) +static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); + DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); return flow ? &flow->filter_list : &p->link.filter_list; } - /* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = NULL ; /* @@@ */ + struct atm_flow_data *flow = NULL; /* @@@ */ struct tcf_result res; int result; int ret = NET_XMIT_POLICED; - D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - result = TC_POLICE_OK; /* be nice to gcc */ + D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + result = TC_POLICE_OK; /* be nice to gcc */ if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *) atm_tc_get(sch,skb->priority))) + !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) for (flow = p->flows; flow; flow = flow->next) if (flow->filter_list) { - result = tc_classify(skb,flow->filter_list, - &res); - if (result < 0) continue; - flow = (struct atm_flow_data *) res.class; - if (!flow) flow = lookup_flow(sch,res.classid); + result = tc_classify_compat(skb, + flow->filter_list, + &res); + if (result < 0) + continue; + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); break; } - if (!flow) flow = &p->link; + if (!flow) + flow = &p->link; else { if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-)*/ -#ifdef CONFIG_NET_CLS_POLICE + /*@@@ looks good ... but it's not supposed to work :-) */ +#ifdef CONFIG_NET_CLS_ACT switch (result) { - case TC_POLICE_SHOT: - kfree_skb(skb); - break; - case TC_POLICE_RECLASSIFY: - if (flow->excess) flow = flow->excess; - else { - ATM_SKB(skb)->atm_options |= - ATM_ATMOPT_CLP; - break; - } - /* fall through */ - case TC_POLICE_OK: - /* fall through */ - default: - break; + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + goto drop; + case TC_POLICE_RECLASSIFY: + if (flow->excess) + flow = flow->excess; + else + ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; + break; } #endif } - if ( -#ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || -#endif - (ret = flow->q->enqueue(skb,flow->q)) != 0) { + + if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { +drop: __maybe_unused sch->qstats.drops++; - if (flow) flow->qstats.drops++; + if (flow) + flow->qstats.drops++; return ret; } sch->bstats.bytes += skb->len; @@ -462,7 +458,6 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) return NET_XMIT_BYPASS; } - /* * Dequeue packets and send them over ATM. Note that we quite deliberately * avoid checking net_device's flow control here, simply because sch_atm @@ -470,167 +465,163 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) * non-ATM interfaces. */ - static void sch_atm_dequeue(unsigned long data) { - struct Qdisc *sch = (struct Qdisc *) data; + struct Qdisc *sch = (struct Qdisc *)data; struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; struct sk_buff *skb; - D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); for (flow = p->link.next; flow; flow = flow->next) /* * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc,skb->truesize)) { - (void) flow->q->ops->requeue(skb,flow->q); + if (!atm_may_send(flow->vcc, skb->truesize)) { + (void)flow->q->ops->requeue(skb, flow->q); break; } - D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow); + D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); if (skb_headroom(skb) < flow->hdr_len) { struct sk_buff *new; - new = skb_realloc_headroom(skb,flow->hdr_len); + new = skb_realloc_headroom(skb, flow->hdr_len); dev_kfree_skb(skb); - if (!new) continue; + if (!new) + continue; skb = new; } D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", skb_network_header(skb), skb->data); ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb,flow->hdr_len),flow->hdr, - flow->hdr_len); + memcpy(skb_push(skb, flow->hdr_len), flow->hdr, + flow->hdr_len); atomic_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ - (void) flow->vcc->send(flow->vcc,skb); + flow->vcc->send(flow->vcc, skb); } } - static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct sk_buff *skb; - D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); skb = p->link.q->dequeue(p->link.q); - if (skb) sch->q.qlen--; + if (skb) + sch->q.qlen--; return skb; } - -static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); int ret; - D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - ret = p->link.q->ops->requeue(skb,p->link.q); + D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + ret = p->link.q->ops->requeue(skb, p->link.q); if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else { + sch->q.qlen++; + sch->qstats.requeues++; + } else { sch->qstats.drops++; p->link.qstats.drops++; } return ret; } - static unsigned int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; unsigned int len; - DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); for (flow = p->flows; flow; flow = flow->next) if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) return len; return 0; } - -static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) +static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt) { struct atm_qdisc_data *p = PRIV(sch); - DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); + DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, - sch->handle))) + if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; - DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); + DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); p->link.filter_list = NULL; p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; p->link.ref = 1; p->link.next = NULL; - tasklet_init(&p->task,sch_atm_dequeue,(unsigned long) sch); + tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } - static void atm_tc_reset(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch,p); - for (flow = p->flows; flow; flow = flow->next) qdisc_reset(flow->q); + DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + qdisc_reset(flow->q); sch->q.qlen = 0; } - static void atm_tc_destroy(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); /* races ? */ while ((flow = p->flows)) { tcf_destroy_chain(flow->filter_list); flow->filter_list = NULL; if (flow->ref > 1) - printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, - flow->ref); - atm_tc_put(sch,(unsigned long) flow); + printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, + flow->ref); + atm_tc_put(sch, (unsigned long)flow); if (p->flows == flow) { printk(KERN_ERR "atm_destroy: putting flow %p didn't " - "kill it\n",flow); - p->flows = flow->next; /* brute force */ + "kill it\n", flow); + p->flows = flow->next; /* brute force */ break; } } tasklet_kill(&p->task); } - static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch,p,flow,skb,tcm); - if (!find_flow(p,flow)) return -EINVAL; + sch, p, flow, skb, tcm); + if (!find_flow(p, flow)) + return -EINVAL; tcm->tcm_handle = flow->classid; tcm->tcm_info = flow->q->handle; - rta = (struct rtattr *) b; - RTA_PUT(skb,TCA_OPTIONS,0,NULL); - RTA_PUT(skb,TCA_ATM_HDR,flow->hdr_len,flow->hdr); + rta = (struct rtattr *)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr); if (flow->vcc) { struct sockaddr_atmpvc pvc; int state; @@ -639,16 +630,16 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; pvc.sap_addr.vci = flow->vcc->vci; - RTA_PUT(skb,TCA_ATM_ADDR,sizeof(pvc),&pvc); + RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc); state = ATM_VF2VS(flow->vcc->flags); - RTA_PUT(skb,TCA_ATM_STATE,sizeof(state),&state); + RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state); } if (flow->excess) - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(u32),&flow->classid); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid); else { static u32 zero; - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero); } rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; @@ -659,9 +650,9 @@ rtattr_failure: } static int atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) + struct gnet_dump *d) { - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; flow->qstats.qlen = flow->q->q.qlen; @@ -678,38 +669,35 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) } static struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .get = atm_tc_get, - .put = atm_tc_put, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_chain = atm_tc_find_tcf, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, + .graft = atm_tc_graft, + .leaf = atm_tc_leaf, + .get = atm_tc_get, + .put = atm_tc_put, + .change = atm_tc_change, + .delete = atm_tc_delete, + .walk = atm_tc_walk, + .tcf_chain = atm_tc_find_tcf, + .bind_tcf = atm_tc_bind_filter, + .unbind_tcf = atm_tc_put, + .dump = atm_tc_dump_class, + .dump_stats = atm_tc_dump_class_stats, }; static struct Qdisc_ops atm_qdisc_ops = { - .next = NULL, - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, - .drop = atm_tc_drop, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .change = NULL, - .dump = atm_tc_dump, - .owner = THIS_MODULE, + .cl_ops = &atm_class_ops, + .id = "atm", + .priv_size = sizeof(struct atm_qdisc_data), + .enqueue = atm_tc_enqueue, + .dequeue = atm_tc_dequeue, + .requeue = atm_tc_requeue, + .drop = atm_tc_drop, + .init = atm_tc_init, + .reset = atm_tc_reset, + .destroy = atm_tc_destroy, + .dump = atm_tc_dump, + .owner = THIS_MODULE, }; - static int __init atm_init(void) { return register_qdisc(&atm_qdisc_ops); diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index cb0c456aa349..f914fc43a124 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ee2d5967d109..cbef3bbfc20f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,28 +11,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/pkt_sched.h> @@ -98,7 +82,7 @@ struct cbq_class unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy; -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT unsigned char police; #endif @@ -148,7 +132,6 @@ struct cbq_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; @@ -171,7 +154,7 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT struct cbq_class *rx_class; #endif struct cbq_class *tx_class; @@ -213,7 +196,7 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) return NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) @@ -264,7 +247,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0) + if (!head->filter_list || + (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) goto fallback; if ((cl = (void*)res.class) == NULL) { @@ -284,15 +268,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; - } -#elif defined(CONFIG_NET_CLS_POLICE) - switch (result) { - case TC_POLICE_RECLASSIFY: + case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); - case TC_POLICE_SHOT: - return NULL; - default: - break; } #endif if (cl->level == 0) @@ -403,10 +380,10 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); int len = skb->len; - int ret; + int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; #endif if (cl == NULL) { @@ -416,7 +393,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { @@ -451,7 +428,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_mark_toplevel(q, cl); -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; cl->q->__parent = sch; #endif @@ -686,9 +663,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) return HRTIMER_NORESTART; } - -#ifdef CONFIG_NET_CLS_POLICE - +#ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { int len = skb->len; @@ -1381,7 +1356,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) return 0; } -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; @@ -1442,7 +1417,6 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.ewma_log = TC_CBQ_DEF_EWMA; q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - q->link.stats_lock = &sch->dev->queue_lock; qdisc_watchdog_init(&q->watchdog, sch); hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -1550,7 +1524,7 @@ rtattr_failure: return -1; } -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1576,7 +1550,7 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || cbq_dump_ovl(skb, cl) < 0 || -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT cbq_dump_police(skb, cl) < 0 || #endif cbq_dump_fopt(skb, cl) < 0) @@ -1653,9 +1627,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1673,7 +1645,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->classid)) == NULL) return -ENOBUFS; } else { -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (cl->police == TC_POLICE_RECLASSIFY) new->reshape_fail = cbq_reshape_fail; #endif @@ -1726,9 +1698,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->link) kfree(cl); } @@ -1740,7 +1710,7 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT q->rx_class = NULL; #endif /* @@ -1769,7 +1739,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) struct cbq_class *cl = (struct cbq_class*)arg; if (--cl->refcnt == 0) { -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(&sch->dev->queue_lock); @@ -1817,7 +1787,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) return -EINVAL; -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1] && RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) return -EINVAL; @@ -1860,7 +1830,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1873,11 +1843,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1935,7 +1904,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->allot = parent->allot; cl->quantum = cl->allot; cl->weight = cl->R_tab->rate.rate; - cl->stats_lock = &sch->dev->queue_lock; sch_tree_lock(sch); cbq_link_class(cl); @@ -1955,7 +1923,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1963,11 +1931,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; @@ -2001,7 +1967,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) q->tx_class = NULL; q->tx_borrowed = NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (q->rx_class == cl) q->rx_class = NULL; #endif diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3c6fd181263f..60f89199e3da 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> /* for pkt_sched */ #include <linux/rtnetlink.h> #include <net/pkt_sched.h> #include <net/dsfield.h> @@ -238,25 +237,23 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) D2PRINTK("result %d class 0x%04x\n", result, res.classid); switch (result) { -#ifdef CONFIG_NET_CLS_POLICE - case TC_POLICE_SHOT: - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_POLICED; -#if 0 - case TC_POLICE_RECLASSIFY: - /* FIXME: what to do here ??? */ +#ifdef CONFIG_NET_CLS_ACT + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_BYPASS; #endif -#endif - case TC_POLICE_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - case TC_POLICE_UNSPEC: - /* fall through */ - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; + case TC_ACT_OK: + skb->tc_index = TC_H_MIN(res.classid); + break; + default: + if (p->default_index != NO_DEFAULT_INDEX) + skb->tc_index = p->default_index; + break; } } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index c2689f4ba8de..c264308f17c1 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -13,7 +13,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a093..c81649cf0b9e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -11,27 +11,19 @@ * - Ingress support */ -#include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/list.h> -#include <net/sock.h> #include <net/pkt_sched.h> /* Main transmission queue. */ @@ -59,122 +51,143 @@ void qdisc_unlock_tree(struct net_device *dev) spin_unlock_bh(&dev->queue_lock); } -/* - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. +static inline int qdisc_qlen(struct Qdisc *q) +{ + return q->q.qlen; +} - netif_tx_lock serializes accesses to device driver. +static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct Qdisc *q) +{ + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); - dev->queue_lock and netif_tx_lock are mutually exclusive, - if one is grabbed, another must be free. - */ + netif_schedule(dev); + return 0; +} +static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct Qdisc *q) +{ + struct sk_buff *skb; -/* Kick device. + if ((skb = dev->gso_skb)) + dev->gso_skb = NULL; + else + skb = q->dequeue(q); - Returns: 0 - queue is empty or throttled. - >0 - queue is not empty. + return skb; +} - NOTE: Called under dev->queue_lock with locally disabled BH. -*/ +static inline int handle_dev_cpu_collision(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) +{ + int ret; + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + /* + * Same CPU holding the lock. It may be a transient + * configuration error, when hard_start_xmit() recurses. We + * detect it by checking xmit owner and drop the packet when + * deadloop is detected. Return OK to try the next skb. + */ + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_WARNING "Dead loop on netdevice %s, " + "fix it urgently!\n", dev->name); + ret = qdisc_qlen(q); + } else { + /* + * Another cpu is holding lock, requeue & delay xmits for + * some time. + */ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + ret = dev_requeue_skb(skb, dev, q); + } + + return ret; +} + +/* + * NOTE: Called under dev->queue_lock with locally disabled BH. + * + * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this + * device at a time. dev->queue_lock serializes queue accesses for + * this device AND dev->qdisc pointer itself. + * + * netif_tx_lock serializes accesses to device driver. + * + * dev->queue_lock and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; + unsigned lockless; + int ret; /* Dequeue packet */ - if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { - unsigned nolock = (dev->features & NETIF_F_LLTX); + if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + return 0; + + /* + * When the driver has LLTX set, it does its own locking in + * start_xmit. These checks are worth it because even uncongested + * locks can be quite expensive. The driver can do a trylock, as + * is being done here; in case of lock contention it should return + * NETDEV_TX_LOCKED and the packet will be requeued. + */ + lockless = (dev->features & NETIF_F_LLTX); - dev->gso_skb = NULL; + if (!lockless && !netif_tx_trylock(dev)) { + /* Another CPU grabbed the driver tx lock */ + return handle_dev_cpu_collision(skb, dev, q); + } - /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. - */ - if (!nolock) { - if (!netif_tx_trylock(dev)) { - collision: - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - goto out; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - goto requeue; - } - } + /* And release queue */ + spin_unlock(&dev->queue_lock); - { - /* And release queue */ - spin_unlock(&dev->queue_lock); - - if (!netif_queue_stopped(dev)) { - int ret; - - ret = dev_hard_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) { - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto out; - } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto collision; - } - } + ret = dev_hard_start_xmit(skb, dev); - /* NETDEV_TX_BUSY - we need to requeue */ - /* Release the driver */ - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - } + if (!lockless) + netif_tx_unlock(dev); - /* Device kicked us out :( - This is possible in three cases: + spin_lock(&dev->queue_lock); + q = dev->qdisc; - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ + switch (ret) { + case NETDEV_TX_OK: + /* Driver sent out skb successfully */ + ret = qdisc_qlen(q); + break; -requeue: - if (unlikely(q == &noop_qdisc)) - kfree_skb(skb); - else if (skb->next) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); - netif_schedule(dev); + case NETDEV_TX_LOCKED: + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, dev, q); + break; + + default: + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING "BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, dev, q); + break; } - return 0; -out: - BUG_ON((int) q->q.qlen < 0); - return q->q.qlen; + return ret; } void __qdisc_run(struct net_device *dev) @@ -493,9 +506,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; list_del(&qdisc->list); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif if (ops->reset) ops->reset(qdisc); if (ops->destroy) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fa1b4fe7a5fd..3cc6dda02e2e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -21,7 +21,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/red.h> diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9d124c4ee3a7..55e7e4530f43 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -53,7 +53,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> -#include <linux/jiffies.h> #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -62,13 +61,11 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <linux/init.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> -#include <asm/system.h> #include <asm/div64.h> /* @@ -122,7 +119,6 @@ struct hfsc_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ @@ -1054,11 +1050,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1098,7 +1093,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; - cl->stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1112,11 +1106,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; } @@ -1128,9 +1120,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->qdisc); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->root) kfree(cl); } @@ -1184,9 +1174,6 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return NULL; #endif if ((cl = (struct hfsc_class *)res.class) == NULL) { if ((cl = hfsc_find_class(res.classid, sch)) == NULL) @@ -1384,9 +1371,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1448,8 +1433,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; qopt = RTA_DATA(opt); - sch->stats_lock = &sch->dev->queue_lock; - q->defcls = qopt->defcls; for (i = 0; i < HFSC_HSIZE; i++) INIT_LIST_HEAD(&q->clhash[i]); @@ -1464,7 +1447,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; - q->root.stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 035788c5b7f8..246a2f9765f1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,32 +28,16 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/compiler.h> +#include <linux/rbtree.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> -#include <linux/rbtree.h> /* HTB algorithm. Author: devik@cdi.cz @@ -69,8 +53,6 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -95,12 +77,6 @@ struct htb_class { struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ -#ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes, sum_bytes; - unsigned long rate_packets, sum_packets; -#endif - /* topology */ int level; /* our level (see above) */ struct htb_class *parent; /* parent class */ @@ -153,15 +129,12 @@ struct htb_class { /* of un.leaf originals should be done. */ }; -/* TODO: maybe compute rate when size is too large .. or drop ? */ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } + if (slot > 255) + return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); return rate->data[slot]; } @@ -194,10 +167,6 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ struct qdisc_watchdog watchdog; -#ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ -#endif /* non shaped skbs; let them go directly thru */ struct sk_buff_head direct_queue; @@ -280,9 +249,6 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return HTB_DIRECT; #endif if ((cl = (void *)res.class) == NULL) { if (res.classid == sch->handle) @@ -634,13 +600,14 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->qstats.drops++; return NET_XMIT_DROP; } else { - cl->bstats.packets++; + cl->bstats.packets += + skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; cl->bstats.bytes += skb->len; htb_activate(q, cl); } sch->q.qlen++; - sch->bstats.packets++; + sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; sch->bstats.bytes += skb->len; return NET_XMIT_SUCCESS; } @@ -677,34 +644,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -#ifdef HTB_RATECM -#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 -static void htb_rate_timer(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - - - /* lock queue so that we can muck with it */ - spin_lock_bh(&sch->dev->queue_lock); - - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); - - /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) - q->recmp_bucket = 0; - - hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { - RT_GEN(cl->sum_bytes, cl->rate_bytes); - RT_GEN(cl->sum_packets, cl->rate_packets); - } - spin_unlock_bh(&sch->dev->queue_lock); -} -#endif - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -717,8 +656,9 @@ static void htb_rate_timer(unsigned long arg) * In such case we remove class from event queue first. */ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, - int level, int bytes) + int level, struct sk_buff *skb) { + int bytes = skb->len; long toks, diff; enum htb_cmode old_mode; @@ -750,16 +690,12 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } -#ifdef HTB_RATECM - /* update rate counters */ - cl->sum_bytes += bytes; - cl->sum_packets++; -#endif /* update byte stats except for leaves which are already updated */ if (cl->level) { cl->bstats.bytes += bytes; - cl->bstats.packets++; + cl->bstats.packets += skb_is_gso(skb)? + skb_shinfo(skb)->gso_segs:1; } cl = cl->parent; } @@ -943,7 +879,7 @@ next: gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); - htb_charge_class(q, cl, level, skb->len); + htb_charge_class(q, cl, level, skb); } return skb; } @@ -1095,13 +1031,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; -#ifdef HTB_RATECM - init_timer(&q->rttim); - q->rttim.function = htb_rate_timer; - q->rttim.data = (unsigned long)sch; - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); -#endif if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1175,11 +1104,6 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; -#ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); -#endif - if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; cl->xstats.tokens = cl->tokens; @@ -1277,6 +1201,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) BUG_TRAP(cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } + gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); @@ -1305,9 +1230,6 @@ static void htb_destroy(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); -#ifdef HTB_RATECM - del_timer_sync(&q->rttim); -#endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call @@ -1403,6 +1325,20 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) { /* new class */ struct Qdisc *new_q; int prio; + struct { + struct rtattr rta; + struct gnet_estimator opt; + } est = { + .rta = { + .rta_len = RTA_LENGTH(sizeof(est.opt)), + .rta_type = TCA_RATE, + }, + .opt = { + /* 4s interval, 16s averaging constant */ + .interval = 2, + .ewma_log = 2, + }, + }; /* check for valid classid */ if (!classid || TC_H_MAJ(classid ^ sch->handle) @@ -1418,6 +1354,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; + gen_new_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1] ? : &est.rta); cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); INIT_HLIST_NODE(&cl->hlist); @@ -1469,8 +1408,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); - } else + } else { + if (tca[TCA_RATE-1]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1]); sch_tree_lock(sch); + } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f8b9f1cdf738..2d32fd27496e 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -9,21 +9,14 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/list.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter.h> -#include <linux/smp.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <asm/byteorder.h> -#include <asm/uaccess.h> -#include <linux/kmod.h> -#include <linux/stat.h> -#include <linux/interrupt.h> -#include <linux/list.h> #undef DEBUG_INGRESS @@ -165,37 +158,17 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) break; case TC_ACT_RECLASSIFY: case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: skb->tc_index = TC_H_MIN(res.classid); + default: result = TC_ACT_OK; break; } -/* backward compat */ -#else -#ifdef CONFIG_NET_CLS_POLICE - switch (result) { - case TC_POLICE_SHOT: - result = NF_DROP; - sch->qstats.drops++; - break; - case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */ - case TC_POLICE_OK: - case TC_POLICE_UNSPEC: - default: - sch->bstats.packets++; - sch->bstats.bytes += skb->len; - result = NF_ACCEPT; - break; - } - #else D2PRINTK("Overriding result to ACCEPT\n"); result = NF_ACCEPT; sch->bstats.packets++; sch->bstats.bytes += skb->len; #endif -#endif return result; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5d9d8bc9cc3a..9e5e87e81f00 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,11 +14,9 @@ */ #include <linux/module.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 6d7542c26e47..abd82fc3ec60 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,37 +12,23 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> struct prio_sched_data { int bands; + int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; + int mq; }; @@ -52,32 +38,34 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; + int err; *qerr = NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { + err = tc_classify(skb, q->filter_list, &res); #ifdef CONFIG_NET_CLS_ACT - switch (tc_classify(skb, q->filter_list, &res)) { + switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; } - - if (!q->filter_list ) { -#else - if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) { #endif + if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; - return q->queues[q->prio2band[band&TC_PRIO_MAX]]; + band = q->prio2band[band&TC_PRIO_MAX]; + goto out; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - return q->queues[q->prio2band[0]]; - + band = q->prio2band[0]; +out: + if (q->mq) + skb_set_queue_mapping(skb, band); return q->queues[band]; } @@ -144,17 +132,58 @@ prio_dequeue(struct Qdisc* sch) struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. + */ + if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + qdisc = q->queues[prio]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } } } return NULL; } +static struct sk_buff *rr_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + int bandcount; + + /* Only take one pass through the queues. If nothing is available, + * return nothing. + */ + for (bandcount = 0; bandcount < q->bands; bandcount++) { + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. If the queue is stopped, try the + * next queue. + */ + if (!netif_subqueue_stopped(sch->dev, + (q->mq ? q->curband : 0))) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + return skb; + } + } + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + } + return NULL; +} + static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -198,21 +227,43 @@ prio_destroy(struct Qdisc* sch) static int prio_tune(struct Qdisc *sch, struct rtattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); - struct tc_prio_qopt *qopt = RTA_DATA(opt); + struct tc_prio_qopt *qopt; + struct rtattr *tb[TCA_PRIO_MAX]; int i; - if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, + sizeof(*qopt))) return -EINVAL; - if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + q->bands = qopt->bands; + /* If we're multiqueue, make sure the number of incoming bands + * matches the number of queues on the device we're associating with. + * If the number of bands requested is zero, then set q->bands to + * dev->egress_subqueue_count. Also, the root qdisc must be the + * only one that is enabled for multiqueue, since it's the only one + * that interacts with the underlying device. + */ + q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); + if (q->mq) { + if (sch->parent != TC_H_ROOT) + return -EINVAL; + if (netif_is_multiqueue(sch->dev)) { + if (q->bands == 0) + q->bands = sch->dev->egress_subqueue_count; + else if (q->bands != sch->dev->egress_subqueue_count) + return -EINVAL; + } else + return -EOPNOTSUPP; + } + + if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= qopt->bands) + if (qopt->priomap[i] >= q->bands) return -EINVAL; } sch_tree_lock(sch); - q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { @@ -268,11 +319,17 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); + struct rtattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); + if (q->mq) + RTA_PUT_FLAG(skb, TCA_PRIO_MQ); + RTA_NEST_COMPAT_END(skb, nest); + return skb->len; rtattr_failure: @@ -443,17 +500,44 @@ static struct Qdisc_ops prio_qdisc_ops = { .owner = THIS_MODULE, }; +static struct Qdisc_ops rr_qdisc_ops = { + .next = NULL, + .cl_ops = &prio_class_ops, + .id = "rr", + .priv_size = sizeof(struct prio_sched_data), + .enqueue = prio_enqueue, + .dequeue = rr_dequeue, + .requeue = prio_requeue, + .drop = prio_drop, + .init = prio_init, + .reset = prio_reset, + .destroy = prio_destroy, + .change = prio_tune, + .dump = prio_dump, + .owner = THIS_MODULE, +}; + static int __init prio_module_init(void) { - return register_qdisc(&prio_qdisc_ops); + int err; + + err = register_qdisc(&prio_qdisc_ops); + if (err < 0) + return err; + err = register_qdisc(&rr_qdisc_ops); + if (err < 0) + unregister_qdisc(&prio_qdisc_ops); + return err; } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); + unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_ALIAS("sch_rr"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 00db53eb8159..9b95fefb70f4 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 96dfdf78d32c..b542c875e154 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -10,31 +10,18 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> #include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/init.h> -#include <net/ip.h> -#include <net/netlink.h> #include <linux/ipv6.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <linux/jhash.h> +#include <net/ip.h> +#include <net/netlink.h> #include <net/pkt_sched.h> @@ -109,7 +96,7 @@ struct sfq_sched_data /* Variables */ struct timer_list perturb_timer; - int perturbation; + u32 perturbation; sfq_index tail; /* Index of current slot in round */ sfq_index max_depth; /* Maximal depth */ @@ -123,12 +110,7 @@ struct sfq_sched_data static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - int pert = q->perturbation; - - /* Have we any rotation primitives? If not, WHY? */ - h ^= (h1<<pert) ^ (h1>>(0x1F - pert)); - h ^= h>>10; - return h & 0x3FF; + return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); } static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -270,6 +252,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->ht[hash] = x = q->dep[SFQ_DEPTH].next; q->hash[x] = hash; } + /* If selected queue has length q->limit, this means that + * all another queues are empty and that we do simple tail drop, + * i.e. drop _this_ packet. + */ + if (q->qs[x].qlen >= q->limit) + return qdisc_drop(skb, sch); + sch->qstats.backlog += skb->len; __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); @@ -284,7 +273,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit-1) { + if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += skb->len; sch->bstats.packets++; return 0; @@ -308,6 +297,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) } sch->qstats.backlog += skb->len; __skb_queue_head(&q->qs[x], skb); + /* If selected queue has length q->limit+1, this means that + * all another queues are empty and we do simple tail drop. + * This packet is still requeued at head of queue, tail packet + * is dropped. + */ + if (q->qs[x].qlen > q->limit) { + skb = q->qs[x].prev; + __skb_unlink(skb, &q->qs[x]); + sch->qstats.drops++; + sch->qstats.backlog -= skb->len; + kfree_skb(skb); + return NET_XMIT_CN; + } sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ if (q->tail == SFQ_DEPTH) { /* It is the first flow */ @@ -320,7 +322,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit - 1) { + if (++sch->q.qlen <= q->limit) { sch->qstats.requeues++; return 0; } @@ -384,12 +386,10 @@ static void sfq_perturbation(unsigned long arg) struct Qdisc *sch = (struct Qdisc*)arg; struct sfq_sched_data *q = qdisc_priv(sch); - q->perturbation = net_random()&0x1F; + get_random_bytes(&q->perturbation, 4); - if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); - } + if (q->perturb_period) + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); } static int sfq_change(struct Qdisc *sch, struct rtattr *opt) @@ -405,17 +405,17 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) q->quantum = ctl->quantum ? : psched_mtu(sch->dev); q->perturb_period = ctl->perturb_period*HZ; if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); qlen = sch->q.qlen; - while (sch->q.qlen >= q->limit-1) + while (sch->q.qlen > q->limit) sfq_drop(sch); qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + get_random_bytes(&q->perturbation, 4); } sch_tree_unlock(sch); return 0; @@ -437,12 +437,13 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt) q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; } - q->limit = SFQ_DEPTH; + q->limit = SFQ_DEPTH - 1; q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { q->quantum = psched_mtu(sch->dev); q->perturb_period = 0; + get_random_bytes(&q->perturbation, 4); } else { int err = sfq_change(sch, opt); if (err) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 53862953baaf..8c2639af4c6a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,29 +13,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/jiffies.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/pkt_sched.h> @@ -142,7 +125,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (skb->len > q->max_size) { sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) #endif kfree_skb(skb); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index f05ad9a30b4c..0968184ea6be 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -9,30 +9,17 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/if_arp.h> -#include <linux/if_ether.h> -#include <linux/inet.h> #include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/init.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <linux/moduleparam.h> -#include <net/sock.h> +#include <net/dst.h> +#include <net/neighbour.h> #include <net/pkt_sched.h> /* @@ -225,7 +212,6 @@ static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) return 0; } -/* "teql*" netdevice routines */ static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) @@ -277,6 +263,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; + int subq = skb->queue_mapping; struct sk_buff *skb_res = NULL; start = master->slaves; @@ -293,7 +280,9 @@ restart: if (slave->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || ! netif_running(slave)) { + if (netif_queue_stopped(slave) || + netif_subqueue_stopped(slave, subq) || + !netif_running(slave)) { busy = 1; continue; } @@ -302,6 +291,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && + !netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 498edb0cd4e5..9bad8ba0feda 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); - rwlock_init(&asoc->base.addr_lock); asoc->state = SCTP_STATE_CLOSED; @@ -727,7 +726,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, break; case SCTP_TRANSPORT_DOWN: - transport->state = SCTP_INACTIVE; + /* if the transort was never confirmed, do not transition it + * to inactive state. + */ + if (transport->state != SCTP_UNCONFIRMED) + transport->state = SCTP_INACTIVE; + spc_state = SCTP_ADDR_UNREACHABLE; break; @@ -932,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, { struct sctp_transport *transport; - sctp_read_lock(&asoc->base.addr_lock); - if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && (htons(asoc->peer.port) == paddr->v4.sin_port)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); @@ -947,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, transport = NULL; out: - sctp_read_unlock(&asoc->base.addr_lock); return transport; } @@ -1371,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr) { - int found; + int found = 0; - sctp_read_lock(&asoc->base.addr_lock); if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) && sctp_bind_addr_match(&asoc->base.bind_addr, laddr, - sctp_sk(asoc->base.sk))) { + sctp_sk(asoc->base.sk))) found = 1; - goto out; - } - found = 0; -out: - sctp_read_unlock(&asoc->base.addr_lock); return found; } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index fdb287a9e2e2..dfffa94fb9f6 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -163,9 +163,15 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, addr->a.v4.sin_port = htons(bp->port); addr->use_as_src = use_as_src; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); - list_add_tail(&addr->list, &bp->address_list); + INIT_RCU_HEAD(&addr->rcu); + + /* We always hold a socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_add_tail_rcu(&addr->list, &bp->address_list); SCTP_DBG_OBJCNT_INC(addr); return 0; @@ -174,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, /* Delete an address from the bind address list in the SCTP_bind_addr * structure. */ -int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) +int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, + void fastcall (*rcu_call)(struct rcu_head *head, + void (*func)(struct rcu_head *head))) { - struct list_head *pos, *temp; - struct sctp_sockaddr_entry *addr; + struct sctp_sockaddr_entry *addr, *temp; - list_for_each_safe(pos, temp, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* We hold the socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_for_each_entry_safe(addr, temp, &bp->address_list, list) { if (sctp_cmp_addr_exact(&addr->a, del_addr)) { /* Found the exact match. */ - list_del(pos); - kfree(addr); - SCTP_DBG_OBJCNT_DEC(addr); - - return 0; + addr->valid = 0; + list_del_rcu(&addr->list); + break; } } + /* Call the rcu callback provided in the args. This function is + * called by both BH packet processing and user side socket option + * processing, but it works on different lists in those 2 contexts. + * Each context provides it's own callback, whether call_rcu_bh() + * or call_rcu(), to make sure that we wait for an appropriate time. + */ + if (addr && !addr->valid) { + rcu_call(&addr->rcu, sctp_local_addr_free); + SCTP_DBG_OBJCNT_DEC(addr); + } + return -EINVAL; } @@ -300,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, struct sctp_sock *opt) { struct sctp_sockaddr_entry *laddr; - struct list_head *pos; - - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (opt->pf->cmp_addr(&laddr->a, addr, opt)) - return 1; + int match = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; + if (opt->pf->cmp_addr(&laddr->a, addr, opt)) { + match = 1; + break; + } } + rcu_read_unlock(); - return 0; + return match; } /* Find the first address in the bind address list that is not present in @@ -323,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, union sctp_addr *addr; void *addr_buf; struct sctp_af *af; - struct list_head *pos; int i; - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + /* This is only called sctp_send_asconf_del_ip() and we hold + * the socket lock in that code patch, so that address list + * can't change. + */ + list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) - return NULL; + break; if (opt->pf->cmp_addr(&laddr->a, addr, opt)) break; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 1404a9e2e78f..8f485a0d14bd 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - rwlock_init(&ep->base.addr_lock); /* Remember who we are attached to. */ ep->base.sk = sk; @@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, const union sctp_addr *laddr) { - struct sctp_endpoint *retval; + struct sctp_endpoint *retval = NULL; - sctp_read_lock(&ep->base.addr_lock); if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, - sctp_sk(ep->base.sk))) { + sctp_sk(ep->base.sk))) retval = ep; - goto out; - } } - retval = NULL; - -out: - sctp_read_unlock(&ep->base.addr_lock); return retval; } @@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( list_for_each(pos, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); if (rport == asoc->peer.port) { - sctp_read_lock(&asoc->base.addr_lock); *transport = sctp_assoc_lookup_paddr(asoc, paddr); - sctp_read_unlock(&asoc->base.addr_lock); if (*transport) return asoc; @@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc( int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { - struct list_head *pos; struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; - sctp_read_lock(&ep->base.addr_lock); bp = &ep->base.bind_addr; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (sctp_has_association(&addr->a, paddr)) { - sctp_read_unlock(&ep->base.addr_lock); + /* This function is called with the socket lock held, + * so the address_list can not change. + */ + list_for_each_entry(addr, &bp->address_list, list) { + if (sctp_has_association(&addr->a, paddr)) return 1; - } } - sctp_read_unlock(&ep->base.addr_lock); return 0; } diff --git a/net/sctp/input.c b/net/sctp/input.c index d57ff7f3c576..f9a0c9276e3b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -590,7 +590,7 @@ out_unlock: * Return 0 - If further processing is needed. * Return 1 - If the packet can be discarded right away. */ -int sctp_rcv_ootb(struct sk_buff *skb) +static int sctp_rcv_ootb(struct sk_buff *skb) { sctp_chunkhdr_t *ch; __u8 *ch_end; @@ -622,6 +622,14 @@ int sctp_rcv_ootb(struct sk_buff *skb) if (SCTP_CID_SHUTDOWN_COMPLETE == ch->type) goto discard; + /* RFC 4460, 2.11.2 + * This will discard packets with INIT chunk bundled as + * subsequent chunks in the packet. When INIT is first, + * the normal INIT processing will discard the chunk. + */ + if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data) + goto discard; + /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR * or a COOKIE ACK the SCTP Packet should be silently * discarded. diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 88aa22407549..e4ea7fdf36ed 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -130,6 +130,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) /* Force chunk->skb->data to chunk->chunk_end. */ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); + + /* Verify that we have at least chunk headers + * worth of buffer left. + */ + if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { + sctp_chunk_free(chunk); + chunk = queue->in_progress = NULL; + } } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2164b511fe51..670fd2740b89 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -77,13 +77,18 @@ #include <asm/uaccess.h> -/* Event handler for inet6 address addition/deletion events. */ +/* Event handler for inet6 address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, memcpy(&addr->a.v6.sin6_addr, &ifa->addr, sizeof(struct in6_addr)); addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - list_del(pos); - kfree(addr); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr)) { + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -290,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, union sctp_addr *saddr) { struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; sctp_scope_t scope; union sctp_addr *baddr = NULL; __u8 matchlen = 0; @@ -312,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, scope = sctp_scope(daddr); bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; /* Go through the bind address list and find the best source address * that matches the scope of the destination address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { @@ -341,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr)); } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); } /* Make a copy of all potential local addresses. */ @@ -367,7 +377,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } @@ -641,6 +653,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsctp6sk = (struct sctp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; + sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; + newinet = inet_sk(newsk); newnp = inet6_sk(newsk); @@ -875,10 +889,6 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) dev = dev_get_by_index(addr->v6.sin6_scope_id); if (!dev) return 0; - if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { - dev_put(dev); - return 0; - } dev_put(dev); } af = opt->pf->af; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 992f361084b7..28f4fe77ceee 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -421,6 +421,13 @@ void sctp_retransmit_mark(struct sctp_outq *q, */ if ((fast_retransmit && (chunk->fast_retransmit > 0)) || (!fast_retransmit && !chunk->tsn_gap_acked)) { + /* If this chunk was sent less then 1 rto ago, do not + * retransmit this chunk, but give the peer time + * to acknowlege it. + */ + if ((jiffies - chunk->sent_at) < transport->rto) + continue; + /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 2f12bf2d8d3c..e4cd841a22e4 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -250,7 +250,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations sctp_eps_ops = { +static const struct seq_operations sctp_eps_ops = { .start = sctp_eps_seq_start, .next = sctp_eps_seq_next, .stop = sctp_eps_seq_stop, @@ -361,7 +361,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations sctp_assoc_ops = { +static const struct seq_operations sctp_assoc_ops = { .start = sctp_assocs_seq_start, .next = sctp_assocs_seq_next, .stop = sctp_assocs_seq_stop, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 34bab36637ac..3d036cdfae41 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,6 +153,9 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + addr->valid = 1; + INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } @@ -192,16 +195,24 @@ static void sctp_free_local_addr_list(void) } } +void sctp_local_addr_free(struct rcu_head *head) +{ + struct sctp_sockaddr_entry *e = container_of(head, + struct sctp_sockaddr_entry, rcu); + kfree(e); +} + /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local @@ -213,7 +224,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, 1, - GFP_ATOMIC); + GFP_ATOMIC); if (error) goto end_copy; } @@ -221,6 +232,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: + rcu_read_unlock(); return error; } @@ -416,9 +428,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, struct rtable *rt; struct flowi fl; struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; @@ -447,23 +457,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, goto out; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; if (dst) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, - list); - if (!laddr->use_as_src) + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid || !laddr->use_as_src) continue; sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. @@ -475,10 +482,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and try to get a dst that * matches a bind address as the source address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; @@ -490,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, } out_unlock: - sctp_read_unlock(addr_lock); + rcu_read_unlock(); out: if (dst) SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n", @@ -600,13 +607,18 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. */ +/* Event handler for inet address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -615,19 +627,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - list_del(pos); - kfree(addr); + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -980,14 +998,14 @@ SCTP_STATIC __init int sctp_init(void) sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", sizeof(struct sctp_bind_bucket), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_bucket_cachep) goto out; sctp_chunk_cachep = kmem_cache_create("sctp_chunk", sizeof(struct sctp_chunk), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_chunk_cachep) goto err_chunk_cachep; @@ -1160,6 +1178,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); + spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ @@ -1227,6 +1246,9 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + /* Unregister notifier for inet address additions/deletions. */ + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Free the local address list. */ sctp_free_local_addr_list(); @@ -1240,9 +1262,6 @@ SCTP_STATIC __exit void sctp_exit(void) inet_unregister_protosw(&sctp_stream_protosw); inet_unregister_protosw(&sctp_seqpacket_protosw); - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8d18f570c2e6..23ae37ec8711 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -65,8 +65,6 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -extern struct kmem_cache *sctp_chunk_cachep; - SCTP_STATIC struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen); @@ -112,20 +110,16 @@ static const struct sctp_paramhdr prsctp_param = { * abort chunk. */ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, - const void *payload, size_t paylen) + size_t paylen) { sctp_errhdr_t err; - int padlen; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; len = sizeof(sctp_errhdr_t) + paylen; - padlen = len % 4; err.length = htons(len); - len += padlen; chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); - sctp_addto_chunk(chunk, paylen, payload); } /* 3.3.2 Initiation (INIT) (1) @@ -785,8 +779,8 @@ struct sctp_chunk *sctp_make_abort_no_data( /* Put the tsn back into network byte order. */ payload = htonl(tsn); - sctp_init_cause(retval, SCTP_ERROR_NO_DATA, (const void *)&payload, - sizeof(payload)); + sctp_init_cause(retval, SCTP_ERROR_NO_DATA, sizeof(payload)); + sctp_addto_chunk(retval, sizeof(payload), (const void *)&payload); /* RFC 2960 6.4 Multi-homed SCTP Endpoints * @@ -828,7 +822,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, goto err_copy; } - sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen); + sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, paylen); + sctp_addto_chunk(retval, paylen, payload); if (paylen) kfree(payload); @@ -855,15 +850,17 @@ struct sctp_chunk *sctp_make_abort_violation( struct sctp_paramhdr phdr; retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen - + sizeof(sctp_chunkhdr_t)); + + sizeof(sctp_paramhdr_t)); if (!retval) goto end; - sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, payload, paylen); + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen + + sizeof(sctp_paramhdr_t)); phdr.type = htons(chunk->chunk_hdr->type); phdr.length = chunk->chunk_hdr->length; - sctp_addto_chunk(retval, sizeof(sctp_paramhdr_t), &phdr); + sctp_addto_chunk(retval, paylen, payload); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr); end: return retval; @@ -960,7 +957,8 @@ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, if (!retval) goto nodata; - sctp_init_cause(retval, cause_code, payload, paylen); + sctp_init_cause(retval, cause_code, paylen); + sctp_addto_chunk(retval, paylen, payload); nodata: return retval; @@ -1133,7 +1131,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = chunklen % 4; + int padlen = WORD_ROUND(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1148,6 +1146,25 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } +/* Append bytes to the end of a parameter. Will panic if chunk is not big + * enough. + */ +void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data) +{ + void *target; + int chunklen = ntohs(chunk->chunk_hdr->length); + + target = skb_put(chunk->skb, len); + + memcpy(target, data, len); + + /* Adjust the chunk length field. */ + chunk->chunk_hdr->length = htons(chunklen + len); + chunk->chunk_end = skb_tail_pointer(chunk->skb); + + return target; +} + /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1179,25 +1196,36 @@ out: */ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) { + struct sctp_datamsg *msg; + struct sctp_chunk *lchunk; + struct sctp_stream *stream; __u16 ssn; __u16 sid; if (chunk->has_ssn) return; - /* This is the last possible instant to assign a SSN. */ - if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { - ssn = 0; - } else { - sid = ntohs(chunk->subh.data_hdr->stream); - if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG) - ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid); - else - ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid); - } + /* All fragments will be on the same stream */ + sid = ntohs(chunk->subh.data_hdr->stream); + stream = &chunk->asoc->ssnmap->out; + + /* Now assign the sequence number to the entire message. + * All fragments must have the same stream sequence number. + */ + msg = chunk->msg; + list_for_each_entry(lchunk, &msg->chunks, frag_list) { + if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { + ssn = 0; + } else { + if (lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG) + ssn = sctp_ssn_next(stream, sid); + else + ssn = sctp_ssn_peek(stream, sid); + } - chunk->subh.data_hdr->ssn = htons(ssn); - chunk->has_ssn = 1; + lchunk->subh.data_hdr->ssn = htons(ssn); + lchunk->has_ssn = 1; + } } /* Helper function to assign a TSN if needed. This assumes that both @@ -1454,7 +1482,6 @@ no_hmac: do_gettimeofday(&tv); if (!asoc && tv_lt(bear_cookie->expiration, tv)) { - __u16 len; /* * Section 3.3.10.3 Stale Cookie Error (3) * @@ -1472,7 +1499,8 @@ no_hmac: __be32 n = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, - &n, sizeof(n)); + sizeof(n)); + sctp_addto_chunk(*errp, sizeof(n), &n); *error = -SCTP_IERROR_STALE_COOKIE; } else *error = -SCTP_IERROR_NOMEM; @@ -1503,7 +1531,7 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, - GFP_ATOMIC); + GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; @@ -1562,7 +1590,8 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, report.num_missing = htonl(1); report.type = paramtype; sctp_init_cause(*errp, SCTP_ERROR_MISS_PARAM, - &report, sizeof(report)); + sizeof(report)); + sctp_addto_chunk(*errp, sizeof(report), &report); } /* Stop processing this chunk. */ @@ -1580,7 +1609,7 @@ static int sctp_process_inv_mandatory(const struct sctp_association *asoc, *errp = sctp_make_op_error_space(asoc, chunk, 0); if (*errp) - sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM, NULL, 0); + sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM, 0); /* Stop processing this chunk. */ return 0; @@ -1601,9 +1630,10 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, *errp = sctp_make_op_error_space(asoc, chunk, payload_len); if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, error, - sizeof(error)); - sctp_addto_chunk(*errp, sizeof(sctp_paramhdr_t), param); + sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(*errp, sizeof(error), error); + sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); } return 0; @@ -1624,9 +1654,10 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, if (!*errp) *errp = sctp_make_op_error_space(asoc, chunk, len); - if (*errp) - sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, - param.v, len); + if (*errp) { + sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len); + sctp_addto_chunk(*errp, len, param.v); + } /* Stop processing this chunk. */ return 0; @@ -1678,10 +1709,13 @@ static int sctp_process_unk_param(const struct sctp_association *asoc, *errp = sctp_make_op_error_space(asoc, chunk, ntohs(chunk->chunk_hdr->length)); - if (*errp) + if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, - param.v, WORD_ROUND(ntohs(param.p->length))); + sctp_addto_chunk(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); + } break; case SCTP_PARAM_ACTION_SKIP: @@ -1696,8 +1730,10 @@ static int sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, - param.v, WORD_ROUND(ntohs(param.p->length))); + sctp_addto_chunk(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); } else { /* If there is no memory for generating the ERROR * report as specified, an ABORT will be triggered @@ -1797,7 +1833,7 @@ int sctp_verify_init(const struct sctp_association *asoc, * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ - if (param.v < (void*)chunk->chunk_end - sizeof(sctp_paramhdr_t)) { + if (param.v != (void*)chunk->chunk_end) { sctp_process_inv_paramlength(asoc, param.p, chunk, errp); return 0; } @@ -2463,6 +2499,52 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_NO_ERROR; } +/* Verify the ASCONF packet before we process it. */ +int sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_paramhdr *param_hdr, void *chunk_end, + struct sctp_paramhdr **errp) { + sctp_addip_param_t *asconf_param; + union sctp_params param; + int length, plen; + + param.v = (sctp_paramhdr_t *) param_hdr; + while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { + length = ntohs(param.p->length); + *errp = param.p; + + if (param.v > chunk_end - length || + length < sizeof(sctp_paramhdr_t)) + return 0; + + switch (param.p->type) { + case SCTP_PARAM_ADD_IP: + case SCTP_PARAM_DEL_IP: + case SCTP_PARAM_SET_PRIMARY: + asconf_param = (sctp_addip_param_t *)param.v; + plen = ntohs(asconf_param->param_hdr.length); + if (plen < sizeof(sctp_addip_param_t) + + sizeof(sctp_paramhdr_t)) + return 0; + break; + case SCTP_PARAM_SUCCESS_REPORT: + case SCTP_PARAM_ADAPTATION_LAYER_IND: + if (length != sizeof(sctp_addip_param_t)) + return 0; + + break; + default: + break; + } + + param.v += WORD_ROUND(length); + } + + if (param.v != chunk_end) + return 0; + + return 1; +} + /* Process an incoming ASCONF chunk with the next expected serial no. and * return an ASCONF_ACK chunk to be sent in response. */ @@ -2577,22 +2659,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - list_for_each(pos, &bp->address_list) { - saddr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* This is always done in BH context with a socket lock + * held, so the list can not change. + */ + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) saddr->use_as_src = 1; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); break; case SCTP_PARAM_DEL_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - retval = sctp_del_bind_addr(bp, &addr); - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh); list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d9fad4f6ffc3..8d7890083493 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1013,8 +1013,9 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, break; case SCTP_DISPOSITION_VIOLATION: - printk(KERN_ERR "sctp protocol violation state %d " - "chunkid %d\n", state, subtype.chunk); + if (net_ratelimit()) + printk(KERN_ERR "sctp protocol violation state %d " + "chunkid %d\n", state, subtype.chunk); break; case SCTP_DISPOSITION_NOT_IMPL: @@ -1130,6 +1131,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, /* Move the Cumulattive TSN Ack ahead. */ sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32); + /* purge the fragmentation queue */ + sctp_ulpq_reasm_flushtsn(&asoc->ulpq, cmd->obj.u32); + /* Abort any in progress partial delivery. */ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f02ce3dddb7b..a583d67cab63 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -90,6 +90,11 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); +static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, @@ -97,6 +102,14 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, const struct sctp_association *asoc, struct sctp_transport *transport); +static sctp_disposition_t sctp_sf_abort_violation( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + void *arg, + sctp_cmd_seq_t *commands, + const __u8 *payload, + const size_t paylen); + static sctp_disposition_t sctp_sf_violation_chunklen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, @@ -104,6 +117,27 @@ static sctp_disposition_t sctp_sf_violation_chunklen( void *arg, sctp_cmd_seq_t *commands); +static sctp_disposition_t sctp_sf_violation_paramlen( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands); + +static sctp_disposition_t sctp_sf_violation_ctsn( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands); + +static sctp_disposition_t sctp_sf_violation_chunk( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -167,16 +201,21 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; + if (!sctp_vtag_verify_either(chunk, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + /* RFC 2960 6.10 Bundling * * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return SCTP_DISPOSITION_VIOLATION; + return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); - if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); /* RFC 2960 10.2 SCTP-to-ULP * @@ -250,7 +289,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, struct sctp_chunk *err_chunk; struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; - struct sock *sk; int len; /* 6.10 Bundling @@ -271,16 +309,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); - sk = ep->base.sk; - /* If the endpoint is not listening or if the number of associations - * on the TCP-style socket exceed the max backlog, respond with an - * ABORT. - */ - if (!sctp_sstate(sk, LISTENING) || - (sctp_style(sk, TCP) && - sk_acceptq_is_full(sk))) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); - /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ @@ -447,17 +475,17 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - /* Make sure that the INIT-ACK chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, - commands); /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return SCTP_DISPOSITION_VIOLATION; + return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + /* Make sure that the INIT-ACK chunk has a valid length */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; @@ -576,12 +604,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev, *ai_ev = NULL; int error = 0; struct sctp_chunk *err_chk_p; + struct sock *sk; /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) - return sctp_sf_ootb(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); /* Make sure that the COOKIE_ECHO chunk has a valid length. * In this case, we check that we have enough for at least a @@ -591,6 +620,15 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + /* If the endpoint is not listening or if the number of associations + * on the TCP-style socket exceed the max backlog, respond with an + * ABORT. + */ + sk = ep->base.sk; + if (!sctp_sstate(sk, LISTENING) || + (sctp_style(sk, TCP) && sk_acceptq_is_full(sk))) + return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + /* "Decode" the chunk. We have no optional parameters so we * are in good shape. */ @@ -1018,19 +1056,21 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, /* This should never happen, but lets log it if so. */ if (unlikely(!link)) { if (from_addr.sa.sa_family == AF_INET6) { - printk(KERN_WARNING - "%s association %p could not find address " - NIP6_FMT "\n", - __FUNCTION__, - asoc, - NIP6(from_addr.v6.sin6_addr)); + if (net_ratelimit()) + printk(KERN_WARNING + "%s association %p could not find address " + NIP6_FMT "\n", + __FUNCTION__, + asoc, + NIP6(from_addr.v6.sin6_addr)); } else { - printk(KERN_WARNING - "%s association %p could not find address " - NIPQUAD_FMT "\n", - __FUNCTION__, - asoc, - NIPQUAD(from_addr.v4.sin_addr.s_addr)); + if (net_ratelimit()) + printk(KERN_WARNING + "%s association %p could not find address " + NIPQUAD_FMT "\n", + __FUNCTION__, + asoc, + NIPQUAD(from_addr.v4.sin_addr.s_addr)); } return SCTP_DISPOSITION_DISCARD; } @@ -1779,7 +1819,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_COMM_UP, 0, asoc->c.sinit_num_ostreams, asoc->c.sinit_max_instreams, - NULL, GFP_ATOMIC); + NULL, GFP_ATOMIC); if (!ev) goto nomem; @@ -2481,6 +2521,11 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = (struct sctp_chunk *) arg; struct sctp_chunk *reply; + /* Make sure that the chunk has a valid length */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + /* Since we are not going to really process this INIT, there * is no point in verifying chunk boundries. Just generate * the SHUTDOWN ACK. @@ -2880,6 +2925,13 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DISCARD; } + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ctsn, asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + /* Return this SACK for further processing. */ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh)); @@ -2907,7 +2959,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2943,6 +2995,7 @@ sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + sctp_sf_pdiscard(ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; } @@ -3103,14 +3156,14 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { - /* Break out if chunk length is less then minimal. */ + /* Report violation if the chunk is less then minimal */ if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) - break; - - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - break; + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, + * do things that are type appropriate. + */ if (SCTP_CID_SHUTDOWN_ACK == ch->type) ootb_shut_ack = 1; @@ -3122,15 +3175,19 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, if (SCTP_CID_ABORT == ch->type) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) - sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); + return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); else - sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); - - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } /* @@ -3196,7 +3253,11 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - return SCTP_DISPOSITION_CONSUME; + /* We need to discard the rest of the packet to prevent + * potential bomming attacks from additional bundled chunks. + * This is documented in SCTP Threats ID. + */ + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); } return SCTP_DISPOSITION_NOMEM; @@ -3219,6 +3280,13 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + struct sctp_chunk *chunk = arg; + + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + /* Although we do have an association in this case, it corresponds * to a restarted association. So the packet is treated as an OOTB * packet and the state function that handles OOTB SHUTDOWN_ACK is @@ -3235,8 +3303,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_chunk *asconf_ack = NULL; + struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *hdr; + union sctp_addr_param *addr_param; __u32 serial; + int length; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3252,6 +3323,20 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, hdr = (sctp_addiphdr_t *)chunk->skb->data; serial = ntohl(hdr->serial); + addr_param = (union sctp_addr_param *)hdr->params; + length = ntohs(addr_param->p.length); + if (length < sizeof(sctp_paramhdr_t)) + return sctp_sf_violation_paramlen(ep, asoc, type, + (void *)addr_param, commands); + + /* Verify the ASCONF chunk before processing it. */ + if (!sctp_verify_asconf(asoc, + (sctp_paramhdr_t *)((void *)addr_param + length), + (void *)chunk->chunk_end, + &err_param)) + return sctp_sf_violation_paramlen(ep, asoc, type, + (void *)&err_param, commands); + /* ADDIP 4.2 C1) Compare the value of the serial number to the value * the endpoint stored in a new association variable * 'Peer-Serial-Number'. @@ -3306,6 +3391,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, struct sctp_chunk *asconf_ack = arg; struct sctp_chunk *last_asconf = asoc->addip_last_asconf; struct sctp_chunk *abort; + struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *addip_hdr; __u32 sent_serial, rcvd_serial; @@ -3323,6 +3409,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); + /* Verify the ASCONF-ACK chunk before processing it. */ + if (!sctp_verify_asconf(asoc, + (sctp_paramhdr_t *)addip_hdr->params, + (void *)asconf_ack->chunk_end, + &err_param)) + return sctp_sf_violation_paramlen(ep, asoc, type, + (void *)&err_param, commands); + if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; sent_serial = ntohl(addip_hdr->serial); @@ -3341,7 +3435,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); if (abort) { - sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, NULL, 0); + sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); } @@ -3371,7 +3465,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); if (abort) { - sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, NULL, 0); + sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); } @@ -3633,6 +3727,16 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + struct sctp_chunk *chunk = arg; + + /* Make sure that the chunk has a valid length. + * Since we don't know the chunk type, we use a general + * chunkhdr structure to make a comparison. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); return SCTP_DISPOSITION_DISCARD; } @@ -3688,9 +3792,84 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + struct sctp_chunk *chunk = arg; + + /* Make sure that the chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + return SCTP_DISPOSITION_VIOLATION; } +/* + * Common function to handle a protocol violation. + */ +static sctp_disposition_t sctp_sf_abort_violation( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + void *arg, + sctp_cmd_seq_t *commands, + const __u8 *payload, + const size_t paylen) +{ + struct sctp_packet *packet = NULL; + struct sctp_chunk *chunk = arg; + struct sctp_chunk *abort = NULL; + + /* Make the abort chunk. */ + abort = sctp_make_abort_violation(asoc, chunk, payload, paylen); + if (!abort) + goto nomem; + + if (asoc) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNREFUSED)); + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + } else { + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + } + } else { + packet = sctp_ootb_pkt_new(asoc, chunk); + + if (!packet) + goto nomem_pkt; + + if (sctp_test_T_bit(abort)) + packet->vtag = ntohl(chunk->sctp_hdr->vtag); + + abort->skb->sk = ep->base.sk; + + sctp_packet_append_chunk(packet, abort); + + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, + SCTP_PACKET(packet)); + + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + } + + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + + return SCTP_DISPOSITION_ABORT; + +nomem_pkt: + sctp_chunk_free(abort); +nomem: + return SCTP_DISPOSITION_NOMEM; +} /* * Handle a protocol violation when the chunk length is invalid. @@ -3718,44 +3897,69 @@ static sctp_disposition_t sctp_sf_violation_chunklen( void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - struct sctp_chunk *abort = NULL; - char err_str[]="The following chunk had invalid length:"; + char err_str[]="The following chunk had invalid length:"; - /* Make the abort chunk. */ - abort = sctp_make_abort_violation(asoc, chunk, err_str, - sizeof(err_str)); - if (!abort) - goto nomem; + return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + sizeof(err_str)); +} - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); +/* + * Handle a protocol violation when the parameter length is invalid. + * "Invalid" length is identified as smaller then the minimal length a + * given parameter can be. + */ +static sctp_disposition_t sctp_sf_violation_paramlen( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) { + char err_str[] = "The following parameter had invalid length:"; - if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, - SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); - sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, - SCTP_ERROR(ECONNREFUSED)); - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, - SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - } else { - sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, - SCTP_ERROR(ECONNABORTED)); - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - } + return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + sizeof(err_str)); +} - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); +/* Handle a protocol violation when the peer trying to advance the + * cumulative tsn ack to a point beyond the max tsn currently sent. + * + * We inform the other end by sending an ABORT with a Protocol Violation + * error code. + */ +static sctp_disposition_t sctp_sf_violation_ctsn( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + sizeof(err_str)); +} - return SCTP_DISPOSITION_ABORT; +/* Handle protocol violation of an invalid chunk bundling. For example, + * when we have an association and we recieve bundled INIT-ACK, or + * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle" + * statement from the specs. Additinally, there might be an attacker + * on the path and we may not want to continue this communication. + */ +static sctp_disposition_t sctp_sf_violation_chunk( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + char err_str[]="The following chunk violates protocol:"; -nomem: - return SCTP_DISPOSITION_NOMEM; -} + if (!asoc) + return sctp_sf_violation(ep, asoc, type, arg, commands); + return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + sizeof(err_str)); +} /*************************************************************************** * These are the state functions for handling primitive (Section 10) events. ***************************************************************************/ @@ -5122,7 +5326,22 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc * association exists, otherwise, use the peer's vtag. */ if (asoc) { - vtag = asoc->peer.i.init_tag; + /* Special case the INIT-ACK as there is no peer's vtag + * yet. + */ + switch(chunk->chunk_hdr->type) { + case SCTP_CID_INIT_ACK: + { + sctp_initack_chunk_t *initack; + + initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; + vtag = ntohl(initack->init_hdr.init_tag); + break; + } + default: + vtag = asoc->peer.i.init_tag; + break; + } } else { /* Special case the INIT and stale COOKIE_ECHO as there is no * vtag yet. diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 70a91ece3c49..ddb0ba3974b0 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -110,7 +110,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -173,7 +173,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -194,7 +194,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -216,7 +216,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_violation), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -258,7 +258,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -300,7 +300,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -499,7 +499,7 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_ /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), \ + TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ /* SCTP_STATE_COOKIE_ECHOED */ \ @@ -528,7 +528,7 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_EMPTY */ TYPE_SCTP_FUNC(sctp_sf_ootb), /* SCTP_STATE_CLOSED */ - TYPE_SCTP_FUNC(sctp_sf_tabort_8_4_8), + TYPE_SCTP_FUNC(sctp_sf_ootb), /* SCTP_STATE_COOKIE_WAIT */ TYPE_SCTP_FUNC(sctp_sf_unk_chunk), /* SCTP_STATE_COOKIE_ECHOED */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b1917f68723c..772fbfb4bfda 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,8 +107,6 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern struct kmem_cache *sctp_bucket_cachep; - /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { @@ -355,6 +353,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) * The function sctp_get_port_local() does duplicate address * detection. */ + addr->v4.sin_port = htons(snum); if ((ret = sctp_get_port_local(sk, addr))) { if (ret == (long) sk) { /* This endpoint has a conflicting address. */ @@ -368,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!bp->port) bp->port = inet_sk(sk)->num; - /* Add the address to the bind address list. */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - /* Use GFP_ATOMIC since BHs are disabled. */ + /* Add the address to the bind address list. + * Use GFP_ATOMIC since BHs will be disabled. + */ ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -433,7 +428,7 @@ out: * * Only sctp_setsockopt_bindx() is supposed to call this function. */ -int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) +static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) { int cnt; int retval = 0; @@ -545,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk, if (i < addrcnt) continue; - /* Use the first address in bind addr list of association as - * Address Parameter of ASCONF CHUNK. + /* Use the first valid address in bind addr list of + * association as Address Parameter of ASCONF CHUNK. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; p = bp->address_list.next; laddr = list_entry(p, struct sctp_sockaddr_entry, list); - sctp_read_unlock(&asoc->base.addr_lock); - chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs, addrcnt, SCTP_PARAM_ADD_IP); if (!chunk) { @@ -568,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, /* Add the new addresses to the bind address list with * use_as_src set to 0. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; @@ -579,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, GFP_ATOMIC); addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); } out: @@ -602,7 +590,7 @@ out: * * Only sctp_setsockopt_bindx() is supposed to call this function. */ -int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) +static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; @@ -652,13 +640,7 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) * socket routing and failover schemes. Refer to comments in * sctp_do_bind(). -daisy */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - retval = sctp_del_bind_addr(bp, sa_addr); - - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, sa_addr, call_rcu); addr_buf += af->sockaddr_len; err_bindx_rem: @@ -749,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, * make sure that we do not delete all the addresses in the * association. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - sctp_read_unlock(&asoc->base.addr_lock); if (!laddr) continue; + /* We do not need RCU protection throughout this loop + * because this is done under a socket lock from the + * setsockopt call. + */ chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt, SCTP_PARAM_DEL_IP); if (!chunk) { @@ -767,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { laddr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); - list_for_each(pos1, &bp->address_list) { - saddr = list_entry(pos1, - struct sctp_sockaddr_entry, - list); + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) saddr->use_as_src = 0; } addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); /* Update the route and saddr entries for all the transports * as some of the addresses in the bind address list are @@ -977,7 +954,7 @@ static int __sctp_connect(struct sock* sk, int err = 0; int addrcnt = 0; int walk_size = 0; - union sctp_addr *sa_addr; + union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; unsigned int f_flags = 0; @@ -1011,7 +988,10 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - err = sctp_verify_addr(sk, sa_addr, af->sockaddr_len); + /* Save current address so we can work with it */ + memcpy(&to, sa_addr, af->sockaddr_len); + + err = sctp_verify_addr(sk, &to, af->sockaddr_len); if (err) goto out_free; @@ -1021,12 +1001,11 @@ static int __sctp_connect(struct sock* sk, if (asoc && asoc->peer.port && asoc->peer.port != port) goto out_free; - memcpy(&to, sa_addr, af->sockaddr_len); /* Check if there already is a matching association on the * endpoint (other than the one created here). */ - asoc2 = sctp_endpoint_lookup_assoc(ep, sa_addr, &transport); + asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport); if (asoc2 && asoc2 != asoc) { if (asoc2->state >= SCTP_STATE_ESTABLISHED) err = -EISCONN; @@ -1039,7 +1018,7 @@ static int __sctp_connect(struct sock* sk, * make sure that there is no peeled-off association matching * the peer address even on another socket. */ - if (sctp_endpoint_is_peeled_off(ep, sa_addr)) { + if (sctp_endpoint_is_peeled_off(ep, &to)) { err = -EADDRNOTAVAIL; goto out_free; } @@ -1070,7 +1049,7 @@ static int __sctp_connect(struct sock* sk, } } - scope = sctp_scope(sa_addr); + scope = sctp_scope(&to); asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); if (!asoc) { err = -ENOMEM; @@ -1079,7 +1058,7 @@ static int __sctp_connect(struct sock* sk, } /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, sa_addr, GFP_KERNEL, + transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN); if (!transport) { err = -ENOMEM; @@ -1103,8 +1082,8 @@ static int __sctp_connect(struct sock* sk, /* Initialize sk's dport and daddr for getpeername() */ inet_sk(sk)->dport = htons(asoc->peer.port); - af = sctp_get_af_specific(to.sa.sa_family); - af->to_sk_daddr(&to, sk); + af = sctp_get_af_specific(sa_addr->sa.sa_family); + af->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; /* in-kernel sockets don't generally have a file allocated to them @@ -1531,7 +1510,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } if (sinfo_flags & SCTP_ABORT) { - struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, msg, msg_len); if (!chunk) { @@ -4059,9 +4037,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; - rwlock_t *addr_lock; int cnt = 0; if (len < sizeof(sctp_assoc_t)) @@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, */ if (0 == id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid * addresses from the global local address list. */ @@ -4096,27 +4068,33 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, - struct sctp_sockaddr_entry, - list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, + &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + cnt++; } + rcu_read_unlock(); } else { cnt = 1; } goto done; } - list_for_each(pos, &bp->address_list) { + /* Protection on the bound address list is not needed, + * since in the socket option context we hold the socket lock, + * so there is no way that the bound address list can change. + */ + list_for_each_entry(addr, &bp->address_list, list) { cnt ++; } - done: - sctp_read_unlock(addr_lock); return cnt; } @@ -4127,14 +4105,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, int max_addrs, void *to, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4149,6 +4129,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, cnt ++; if (cnt >= max_addrs) break; } + rcu_read_unlock(); return cnt; } @@ -4156,14 +4137,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4171,8 +4154,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (space_left < addrlen) - return -ENOMEM; + if (space_left < addrlen) { + cnt = -ENOMEM; + break; + } memcpy(to, &temp, addrlen); to += addrlen; @@ -4180,6 +4165,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, space_left -= addrlen; *bytes_copied += addrlen; } + rcu_read_unlock(); return cnt; } @@ -4192,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs_old getaddrs; struct sctp_sockaddr_entry *addr; @@ -4200,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; void *addrs; void *buf; @@ -4222,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = getaddrs.addrs; @@ -4242,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4259,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; @@ -4272,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - /* copy the entire address list into the user provided space */ if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; @@ -4295,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs getaddrs; struct sctp_sockaddr_entry *addr; @@ -4303,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; size_t space_left; int bytes_copied = 0; @@ -4324,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = optval + offsetof(struct sctp_getaddrs,addrs); @@ -4340,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4353,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, space_left, &bytes_copied); if (cnt < 0) { err = cnt; - goto error; + goto out; } goto copy_getaddrs; } } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) { err = -ENOMEM; /*fixme: right error?*/ - goto error; + goto out; } memcpy(buf, &temp, addrlen); buf += addrlen; @@ -4377,19 +4355,17 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; - goto error; + goto out; } if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) { err = -EFAULT; - goto error; + goto out; } if (put_user(bytes_copied, optlen)) err = -EFAULT; -error: +out: kfree(addrs); return err; } @@ -4803,7 +4779,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len, char __user *optval, int __user *optlen) { - u32 val; + u32 val; if (len < sizeof(u32)) return -EINVAL; @@ -4827,7 +4803,7 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, char __user *optval, int __user *optlen) { - int val; + int val; if (len < sizeof(int)) return -EINVAL; @@ -5197,6 +5173,7 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) sctp_unhash_endpoint(ep); sk->sk_state = SCTP_SS_CLOSED; + return 0; } /* Return if we are already listening. */ @@ -5244,6 +5221,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) sctp_unhash_endpoint(ep); sk->sk_state = SCTP_SS_CLOSED; + return 0; } if (sctp_sstate(sk, LISTENING)) @@ -5964,7 +5942,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) return err; } -void sctp_wait_for_close(struct sock *sk, long timeout) +static void sctp_wait_for_close(struct sock *sk, long timeout) { DEFINE_WAIT(wait); diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index d3192a1babcc..1ff0daade304 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -161,7 +161,7 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, __u16 *start, __u16 *end) { int started, ended; - __u16 _start, _end, offset; + __u16 start_, end_, offset; /* We haven't found a gap yet. */ started = ended = 0; @@ -175,7 +175,7 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, offset = iter->start - map->base_tsn; sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0, - &started, &_start, &ended, &_end); + &started, &start_, &ended, &end_); } /* Do we need to check the overflow map? */ @@ -193,8 +193,8 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, offset, map->len, map->len, - &started, &_start, - &ended, &_end); + &started, &start_, + &ended, &end_); } /* The Gap Ack Block happens to end at the end of the @@ -202,7 +202,7 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, */ if (started && !ended) { ended++; - _end = map->len + map->len - 1; + end_ = map->len + map->len - 1; } /* If we found a Gap Ack Block, return the start and end and @@ -215,8 +215,8 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, int gap = map->cumulative_tsn_ack_point - map->base_tsn; - *start = _start - gap; - *end = _end - gap; + *start = start_ - gap; + *end = end_ - gap; /* Move the iterator forward. */ iter->start = map->cumulative_tsn_ack_point + *end + 1; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 34eb977a204d..fa0ba2a5564e 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -659,6 +659,46 @@ done: return retval; } +/* + * Flush out stale fragments from the reassembly queue when processing + * a Forward TSN. + * + * RFC 3758, Section 3.6 + * + * After receiving and processing a FORWARD TSN, the data receiver MUST + * take cautions in updating its re-assembly queue. The receiver MUST + * remove any partially reassembled message, which is still missing one + * or more TSNs earlier than or equal to the new cumulative TSN point. + * In the event that the receiver has invoked the partial delivery API, + * a notification SHOULD also be generated to inform the upper layer API + * that the message being partially delivered will NOT be completed. + */ +void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn) +{ + struct sk_buff *pos, *tmp; + struct sctp_ulpevent *event; + __u32 tsn; + + if (skb_queue_empty(&ulpq->reasm)) + return; + + skb_queue_walk_safe(&ulpq->reasm, pos, tmp) { + event = sctp_skb2event(pos); + tsn = event->tsn; + + /* Since the entire message must be abandoned by the + * sender (item A3 in Section 3.5, RFC 3758), we can + * free all fragments on the list that are less then + * or equal to ctsn_point + */ + if (TSN_lte(tsn, fwd_tsn)) { + __skb_unlink(pos, &ulpq->reasm); + sctp_ulpevent_free(event); + } else + break; + } +} + /* Helper function to gather skbs that have possibly become * ordered by an an incoming chunk. */ @@ -794,7 +834,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, /* Helper function to gather skbs that have possibly become * ordered by forward tsn skipping their dependencies. */ -static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) +static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; @@ -813,31 +853,40 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) csid = cevent->stream; cssn = cevent->ssn; - if (cssn != sctp_ssn_peek(in, csid)) + /* Have we gone too far? */ + if (csid > sid) break; - /* Found it, so mark in the ssnmap. */ - sctp_ssn_next(in, csid); + /* Have we not gone far enough? */ + if (csid < sid) + continue; + + /* see if this ssn has been marked by skipping */ + if (!SSN_lt(cssn, sctp_ssn_peek(in, csid))) + break; __skb_unlink(pos, &ulpq->lobby); - if (!event) { + if (!event) /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); - __skb_queue_tail(&temp, sctp_event2skb(event)); - } else { - /* Attach all gathered skbs to the event. */ - __skb_queue_tail(&temp, pos); - } + + /* Attach all gathered skbs to the event. */ + __skb_queue_tail(&temp, pos); } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ - if (event) + if (event) { + /* see if we have more ordered that we can deliver */ + sctp_ulpq_retrieve_ordered(ulpq, event); sctp_ulpq_tail_event(ulpq, event); + } } -/* Skip over an SSN. */ +/* Skip over an SSN. This is used during the processing of + * Forwared TSN chunk to skip over the abandoned ordered data + */ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) { struct sctp_stream *in; @@ -855,7 +904,7 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) /* Go find any other chunks that were waiting for * ordering and deliver them if needed. */ - sctp_ulpq_reap_ordered(ulpq); + sctp_ulpq_reap_ordered(ulpq, sid); return; } diff --git a/net/socket.c b/net/socket.c index 0010da086acf..b09eb9036a17 100644 --- a/net/socket.c +++ b/net/socket.c @@ -272,8 +272,7 @@ static int init_inodecache(void) (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD), - init_once, - NULL); + init_once); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; @@ -778,9 +777,6 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, if (pos != 0) return -ESPIPE; - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ - return 0; - x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; @@ -1939,9 +1935,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, total_len = err; cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = 0; - if (MSG_CMSG_COMPAT & flags) - msg_sys.msg_flags = MSG_CMSG_COMPAT; + msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9527f2bb1744..1ea27559b1de 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -18,12 +18,16 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { +static DEFINE_SPINLOCK(rpc_authflavor_lock); +static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ &authunix_ops, /* AUTH_UNIX */ NULL, /* others can be loadable modules */ }; +static LIST_HEAD(cred_unused); +static unsigned long number_cred_unused; + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -32,55 +36,67 @@ pseudoflavor_to_flavor(u32 flavor) { } int -rpcauth_register(struct rpc_authops *ops) +rpcauth_register(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != NULL) - return -EPERM; /* what else? */ - auth_flavors[flavor] = ops; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == NULL) { + auth_flavors[flavor] = ops; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } int -rpcauth_unregister(struct rpc_authops *ops) +rpcauth_unregister(const struct rpc_authops *ops) { rpc_authflavor_t flavor; + int ret = -EPERM; if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) return -EINVAL; - if (auth_flavors[flavor] != ops) - return -EPERM; /* what else? */ - auth_flavors[flavor] = NULL; - return 0; + spin_lock(&rpc_authflavor_lock); + if (auth_flavors[flavor] == ops) { + auth_flavors[flavor] = NULL; + ret = 0; + } + spin_unlock(&rpc_authflavor_lock); + return ret; } struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { struct rpc_auth *auth; - struct rpc_authops *ops; + const struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); auth = ERR_PTR(-EINVAL); if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; - /* FIXME - auth_flavors[] really needs an rw lock, - * and module refcounting. */ #ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); #endif - if ((ops = auth_flavors[flavor]) == NULL) + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); goto out; + } + spin_unlock(&rpc_authflavor_lock); auth = ops->create(clnt, pseudoflavor); + module_put(ops->owner); if (IS_ERR(auth)) return auth; if (clnt->cl_auth) - rpcauth_destroy(clnt->cl_auth); + rpcauth_release(clnt->cl_auth); clnt->cl_auth = auth; out: @@ -88,7 +104,7 @@ out: } void -rpcauth_destroy(struct rpc_auth *auth) +rpcauth_release(struct rpc_auth *auth) { if (!atomic_dec_and_test(&auth->au_count)) return; @@ -97,11 +113,31 @@ rpcauth_destroy(struct rpc_auth *auth) static DEFINE_SPINLOCK(rpc_credcache_lock); +static void +rpcauth_unhash_cred_locked(struct rpc_cred *cred) +{ + hlist_del_rcu(&cred->cr_hash); + smp_mb__before_clear_bit(); + clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); +} + +static void +rpcauth_unhash_cred(struct rpc_cred *cred) +{ + spinlock_t *cache_lock; + + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) + rpcauth_unhash_cred_locked(cred); + spin_unlock(cache_lock); +} + /* * Initialize RPC credential cache */ int -rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) +rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; int i; @@ -111,8 +147,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) return -ENOMEM; for (i = 0; i < RPC_CREDCACHE_NR; i++) INIT_HLIST_HEAD(&new->hashtable[i]); - new->expire = expire; - new->nextgc = jiffies + (expire >> 1); + spin_lock_init(&new->lock); auth->au_credcache = new; return 0; } @@ -121,13 +156,13 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) * Destroy a list of credentials */ static inline -void rpcauth_destroy_credlist(struct hlist_head *head) +void rpcauth_destroy_credlist(struct list_head *head) { struct rpc_cred *cred; - while (!hlist_empty(head)) { - cred = hlist_entry(head->first, struct rpc_cred, cr_hash); - hlist_del_init(&cred->cr_hash); + while (!list_empty(head)) { + cred = list_entry(head->next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); put_rpccred(cred); } } @@ -137,58 +172,95 @@ void rpcauth_destroy_credlist(struct hlist_head *head) * that are not referenced. */ void -rpcauth_free_credcache(struct rpc_auth *auth) +rpcauth_clear_credcache(struct rpc_cred_cache *cache) { - struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; + LIST_HEAD(free); + struct hlist_head *head; struct rpc_cred *cred; int i; spin_lock(&rpc_credcache_lock); + spin_lock(&cache->lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, &free); + head = &cache->hashtable[i]; + while (!hlist_empty(head)) { + cred = hlist_entry(head->first, struct rpc_cred, cr_hash); + get_rpccred(cred); + if (!list_empty(&cred->cr_lru)) { + list_del(&cred->cr_lru); + number_cred_unused--; + } + list_add_tail(&cred->cr_lru, &free); + rpcauth_unhash_cred_locked(cred); } } + spin_unlock(&cache->lock); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); } -static void -rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) +/* + * Destroy the RPC credential cache + */ +void +rpcauth_destroy_credcache(struct rpc_auth *auth) { - if (atomic_read(&cred->cr_count) != 1) - return; - if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { - __hlist_del(&cred->cr_hash); - hlist_add_head(&cred->cr_hash, free); + struct rpc_cred_cache *cache = auth->au_credcache; + + if (cache) { + auth->au_credcache = NULL; + rpcauth_clear_credcache(cache); + kfree(cache); } } /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static void -rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) +static int +rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { - struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos, *next; - struct rpc_cred *cred; - int i; + spinlock_t *cache_lock; + struct rpc_cred *cred; - dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { - hlist_for_each_safe(pos, next, &cache->hashtable[i]) { - cred = hlist_entry(pos, struct rpc_cred, cr_hash); - rpcauth_prune_expired(auth, cred, free); + while (!list_empty(&cred_unused)) { + cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + list_del_init(&cred->cr_lru); + number_cred_unused--; + if (atomic_read(&cred->cr_count) != 0) + continue; + cache_lock = &cred->cr_auth->au_credcache->lock; + spin_lock(cache_lock); + if (atomic_read(&cred->cr_count) == 0) { + get_rpccred(cred); + list_add_tail(&cred->cr_lru, free); + rpcauth_unhash_cred_locked(cred); + nr_to_scan--; } + spin_unlock(cache_lock); + if (nr_to_scan == 0) + break; } - cache->nextgc = jiffies + cache->expire; + return nr_to_scan; +} + +/* + * Run memory cache shrinker. + */ +static int +rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +{ + LIST_HEAD(free); + int res; + + if (list_empty(&cred_unused)) + return 0; + spin_lock(&rpc_credcache_lock); + nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); + res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + return res; } /* @@ -198,53 +270,56 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, int flags) { + LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - HLIST_HEAD(free); - struct hlist_node *pos, *next; - struct rpc_cred *new = NULL, - *cred = NULL; + struct hlist_node *pos; + struct rpc_cred *cred = NULL, + *entry, *new; int nr = 0; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) nr = acred->uid & RPC_CREDCACHE_MASK; -retry: - spin_lock(&rpc_credcache_lock); - if (time_before(cache->nextgc, jiffies)) - rpcauth_gc_credcache(auth, &free); - hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { - struct rpc_cred *entry; - entry = hlist_entry(pos, struct rpc_cred, cr_hash); - if (entry->cr_ops->crmatch(acred, entry, flags)) { - hlist_del(&entry->cr_hash); - cred = entry; - break; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + spin_lock(&cache->lock); + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { + spin_unlock(&cache->lock); + continue; } - rpcauth_prune_expired(auth, entry, &free); - } - if (new) { - if (cred) - hlist_add_head(&new->cr_hash, &free); - else - cred = new; + cred = get_rpccred(entry); + spin_unlock(&cache->lock); + break; } - if (cred) { - hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); - get_rpccred(cred); - } - spin_unlock(&rpc_credcache_lock); + rcu_read_unlock(); - rpcauth_destroy_credlist(&free); + if (cred != NULL) + goto found; - if (!cred) { - new = auth->au_ops->crcreate(auth, acred, flags); - if (!IS_ERR(new)) { -#ifdef RPC_DEBUG - new->cr_magic = RPCAUTH_CRED_MAGIC; -#endif - goto retry; - } else - cred = new; - } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) + new = auth->au_ops->crcreate(auth, acred, flags); + if (IS_ERR(new)) { + cred = new; + goto out; + } + + spin_lock(&cache->lock); + hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + if (!entry->cr_ops->crmatch(acred, entry, flags)) + continue; + cred = get_rpccred(entry); + break; + } + if (cred == NULL) { + cred = new; + set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); + hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); + } else + list_add_tail(&new->cr_lru, &free); + spin_unlock(&cache->lock); +found: + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); @@ -253,8 +328,9 @@ retry: cred = ERR_PTR(res); } } - - return (struct rpc_cred *) cred; + rpcauth_destroy_credlist(&free); +out: + return cred; } struct rpc_cred * @@ -275,10 +351,27 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) return ret; } +void +rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, + struct rpc_auth *auth, const struct rpc_credops *ops) +{ + INIT_HLIST_NODE(&cred->cr_hash); + INIT_LIST_HEAD(&cred->cr_lru); + atomic_set(&cred->cr_count, 1); + cred->cr_auth = auth; + cred->cr_ops = ops; + cred->cr_expire = jiffies; +#ifdef RPC_DEBUG + cred->cr_magic = RPCAUTH_CRED_MAGIC; +#endif + cred->cr_uid = acred->uid; +} +EXPORT_SYMBOL(rpcauth_init_cred); + struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { - struct rpc_auth *auth = task->tk_auth; + struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { .uid = current->fsuid, .gid = current->fsgid, @@ -288,7 +381,7 @@ rpcauth_bindcred(struct rpc_task *task) int flags = 0; dprintk("RPC: %5u looking up %s cred\n", - task->tk_pid, task->tk_auth->au_ops->au_name); + task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); get_group_info(acred.group_info); if (task->tk_flags & RPC_TASK_ROOTCREDS) flags |= RPCAUTH_LOOKUP_ROOTCREDS; @@ -304,19 +397,42 @@ rpcauth_bindcred(struct rpc_task *task) void rpcauth_holdcred(struct rpc_task *task) { - dprintk("RPC: %5u holding %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, - task->tk_msg.rpc_cred); - if (task->tk_msg.rpc_cred) - get_rpccred(task->tk_msg.rpc_cred); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + if (cred != NULL) { + get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); + } } void put_rpccred(struct rpc_cred *cred) { - cred->cr_expire = jiffies; + /* Fast path for unhashed credentials */ + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + goto need_lock; + if (!atomic_dec_and_test(&cred->cr_count)) return; + goto out_destroy; +need_lock: + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + return; + if (!list_empty(&cred->cr_lru)) { + number_cred_unused--; + list_del_init(&cred->cr_lru); + } + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) + rpcauth_unhash_cred(cred); + else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + spin_unlock(&rpc_credcache_lock); + return; + } + spin_unlock(&rpc_credcache_lock); +out_destroy: cred->cr_ops->crdestroy(cred); } @@ -326,7 +442,7 @@ rpcauth_unbindcred(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); put_rpccred(cred); task->tk_msg.rpc_cred = NULL; @@ -338,7 +454,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u marshaling %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crmarshal(task, p); } @@ -349,7 +465,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p) struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %5u validating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); return cred->cr_ops->crvalidate(task, p); } @@ -365,7 +481,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ - return encode(rqstp, data, obj); + return rpc_call_xdrproc(encode, rqstp, data, obj); } int @@ -380,7 +496,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); /* By default, we decode the arguments normally. */ - return decode(rqstp, data, obj); + return rpc_call_xdrproc(decode, rqstp, data, obj); } int @@ -390,7 +506,7 @@ rpcauth_refreshcred(struct rpc_task *task) int err; dprintk("RPC: %5u refreshing %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); if (err < 0) @@ -401,17 +517,35 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + dprintk("RPC: %5u invalidating %s cred %p\n", - task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); - spin_lock(&rpc_credcache_lock); - if (task->tk_msg.rpc_cred) - task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - spin_unlock(&rpc_credcache_lock); + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } int rpcauth_uptodatecred(struct rpc_task *task) { - return !(task->tk_msg.rpc_cred) || - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + return cred == NULL || + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; +} + +static struct shrinker rpc_cred_shrinker = { + .shrink = rpcauth_cache_shrinker, + .seeks = DEFAULT_SEEKS, +}; + +void __init rpcauth_init_module(void) +{ + rpc_init_authunix(); + register_shrinker(&rpc_cred_shrinker); +} + +void __exit rpcauth_remove_module(void) +{ + unregister_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4e4ccc5b6fea..53995af9ca4b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -54,9 +54,10 @@ #include <linux/sunrpc/gss_api.h> #include <asm/uaccess.h> -static struct rpc_authops authgss_ops; +static const struct rpc_authops authgss_ops; -static struct rpc_credops gss_credops; +static const struct rpc_credops gss_credops; +static const struct rpc_credops gss_nullops; #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -64,7 +65,6 @@ static struct rpc_credops gss_credops; #define NFS_NGROUPS 16 -#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ @@ -79,19 +79,16 @@ static struct rpc_credops gss_credops; /* dump the buffer in `emacs-hexl' style */ #define isprint(c) ((c > 0x1f) && (c < 0x7f)) -static DEFINE_RWLOCK(gss_ctx_lock); - struct gss_auth { + struct kref kref; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; - struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - spinlock_t lock; }; -static void gss_destroy_ctx(struct gss_cl_ctx *); +static void gss_free_ctx(struct gss_cl_ctx *); static struct rpc_pipe_ops gss_upcall_ops; static inline struct gss_cl_ctx * @@ -105,20 +102,24 @@ static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { if (atomic_dec_and_test(&ctx->count)) - gss_destroy_ctx(ctx); + gss_free_ctx(ctx); } +/* gss_cred_set_ctx: + * called by gss_upcall_callback and gss_create_upcall in order + * to set the gss context. The actual exchange of an old context + * and a new one is protected by the inode->i_lock. + */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *old; - write_lock(&gss_ctx_lock); + old = gss_cred->gc_ctx; - gss_cred->gc_ctx = ctx; - cred->cr_flags |= RPCAUTH_CRED_UPTODATE; - cred->cr_flags &= ~RPCAUTH_CRED_NEW; - write_unlock(&gss_ctx_lock); + rcu_assign_pointer(gss_cred->gc_ctx, ctx); + set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); if (old) gss_put_ctx(old); } @@ -129,10 +130,10 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); int res = 0; - read_lock(&gss_ctx_lock); - if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) + rcu_read_lock(); + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) res = 1; - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return res; } @@ -171,10 +172,10 @@ gss_cred_get_ctx(struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = NULL; - read_lock(&gss_ctx_lock); + rcu_read_lock(); if (gss_cred->gc_ctx) ctx = gss_get_ctx(gss_cred->gc_ctx); - read_unlock(&gss_ctx_lock); + rcu_read_unlock(); return ctx; } @@ -269,10 +270,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) +__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) { struct gss_upcall_msg *pos; - list_for_each_entry(pos, &gss_auth->upcalls, list) { + list_for_each_entry(pos, &rpci->in_downcall, list) { if (pos->uid != uid) continue; atomic_inc(&pos->count); @@ -290,24 +291,24 @@ __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) static inline struct gss_upcall_msg * gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) { + struct inode *inode = gss_auth->dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *old; - spin_lock(&gss_auth->lock); - old = __gss_find_upcall(gss_auth, gss_msg->uid); + spin_lock(&inode->i_lock); + old = __gss_find_upcall(rpci, gss_msg->uid); if (old == NULL) { atomic_inc(&gss_msg->count); - list_add(&gss_msg->list, &gss_auth->upcalls); + list_add(&gss_msg->list, &rpci->in_downcall); } else gss_msg = old; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); return gss_msg; } static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { - if (list_empty(&gss_msg->list)) - return; list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); @@ -318,10 +319,14 @@ static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct gss_auth *gss_auth = gss_msg->auth; + struct inode *inode = gss_auth->dentry->d_inode; - spin_lock(&gss_auth->lock); - __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + if (list_empty(&gss_msg->list)) + return; + spin_lock(&inode->i_lock); + if (!list_empty(&gss_msg->list)) + __gss_unhash_msg(gss_msg); + spin_unlock(&inode->i_lock); } static void @@ -330,16 +335,16 @@ gss_upcall_callback(struct rpc_task *task) struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; + struct inode *inode = gss_msg->auth->dentry->d_inode; - BUG_ON(gss_msg == NULL); + spin_lock(&inode->i_lock); if (gss_msg->ctx) gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); else task->tk_status = gss_msg->msg.errno; - spin_lock(&gss_msg->auth->lock); gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); - spin_unlock(&gss_msg->auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); } @@ -386,11 +391,12 @@ static inline int gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; + struct inode *inode = gss_auth->dentry->d_inode; int err = 0; dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, @@ -400,7 +406,7 @@ gss_refresh_upcall(struct rpc_task *task) err = PTR_ERR(gss_msg); goto out; } - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { @@ -411,7 +417,7 @@ gss_refresh_upcall(struct rpc_task *task) rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); } else err = gss_msg->msg.errno; - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", @@ -422,6 +428,7 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct inode *inode = gss_auth->dentry->d_inode; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); @@ -435,12 +442,11 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) } for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { - spin_unlock(&gss_auth->lock); break; } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); if (signalled()) { err = -ERESTARTSYS; goto out_intr; @@ -451,6 +457,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); else err = gss_msg->msg.errno; + spin_unlock(&inode->i_lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); @@ -489,12 +496,11 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct rpc_clnt *clnt; - struct gss_auth *gss_auth; - struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; + struct inode *inode = filp->f_path.dentry->d_inode; struct gss_cl_ctx *ctx; uid_t uid; - int err = -EFBIG; + ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; @@ -503,7 +509,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(filp->f_path.dentry->d_inode)->private; + clnt = RPC_I(inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; @@ -519,43 +525,38 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ctx = gss_alloc_context(); if (ctx == NULL) goto err; - err = 0; - gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); - p = gss_fill_context(p, end, ctx, gss_auth->mech); + + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&inode->i_lock); + gss_msg = __gss_find_upcall(RPC_I(inode), uid); + if (gss_msg == NULL) { + spin_unlock(&inode->i_lock); + goto err_put_ctx; + } + list_del_init(&gss_msg->list); + spin_unlock(&inode->i_lock); + + p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); - if (err != -EACCES) - goto err_put_ctx; + gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; + goto err_release_msg; } - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - if (gss_msg) { - if (err == 0 && gss_msg->ctx == NULL) - gss_msg->ctx = gss_get_ctx(ctx); - gss_msg->msg.errno = err; - __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); - gss_release_msg(gss_msg); - } else { - struct auth_cred acred = { .uid = uid }; - spin_unlock(&gss_auth->lock); - cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); - if (IS_ERR(cred)) { - err = PTR_ERR(cred); - goto err_put_ctx; - } - gss_cred_set_ctx(cred, gss_get_ctx(ctx)); - } - gss_put_ctx(ctx); - kfree(buf); - dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); - return mlen; + gss_msg->ctx = gss_get_ctx(ctx); + err = mlen; + +err_release_msg: + spin_lock(&inode->i_lock); + __gss_unhash_msg(gss_msg); + spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: - dprintk("RPC: gss_pipe_downcall returning %d\n", err); + dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); return err; } @@ -563,27 +564,21 @@ static void gss_pipe_release(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_clnt *clnt; - struct rpc_auth *auth; - struct gss_auth *gss_auth; + struct gss_upcall_msg *gss_msg; - clnt = rpci->private; - auth = clnt->cl_auth; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - spin_lock(&gss_auth->lock); - while (!list_empty(&gss_auth->upcalls)) { - struct gss_upcall_msg *gss_msg; + spin_lock(&inode->i_lock); + while (!list_empty(&rpci->in_downcall)) { - gss_msg = list_entry(gss_auth->upcalls.next, + gss_msg = list_entry(rpci->in_downcall.next, struct gss_upcall_msg, list); gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&gss_auth->lock); + spin_lock(&inode->i_lock); } - spin_unlock(&gss_auth->lock); + spin_unlock(&inode->i_lock); } static void @@ -637,18 +632,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; - INIT_LIST_HEAD(&gss_auth->upcalls); - spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - - err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); - if (err) - goto err_put_mech; + kref_init(&gss_auth->kref); gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); @@ -657,7 +647,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } + err = rpcauth_init_credcache(auth); + if (err) + goto err_unlink_pipe; + return auth; +err_unlink_pipe: + rpc_unlink(gss_auth->dentry); err_put_mech: gss_mech_put(gss_auth->mech); err_free: @@ -668,6 +664,25 @@ out_dec: } static void +gss_free(struct gss_auth *gss_auth) +{ + rpc_unlink(gss_auth->dentry); + gss_auth->dentry = NULL; + gss_mech_put(gss_auth->mech); + + kfree(gss_auth); + module_put(THIS_MODULE); +} + +static void +gss_free_callback(struct kref *kref) +{ + struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref); + + gss_free(gss_auth); +} + +static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth; @@ -675,41 +690,103 @@ gss_destroy(struct rpc_auth *auth) dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); + rpcauth_destroy_credcache(auth); + gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->dentry); - gss_auth->dentry = NULL; - gss_mech_put(gss_auth->mech); + kref_put(&gss_auth->kref, gss_free_callback); +} - rpcauth_free_credcache(auth); - kfree(gss_auth); - module_put(THIS_MODULE); +/* + * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call + * to the server with the GSS control procedure field set to + * RPC_GSS_PROC_DESTROY. This should normally cause the server to release + * all RPCSEC_GSS state associated with that context. + */ +static int +gss_destroying_context(struct rpc_cred *cred) +{ + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct rpc_task *task; + + if (gss_cred->gc_ctx == NULL || + gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY) + return 0; + + gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; + cred->cr_ops = &gss_nullops; + + /* Take a reference to ensure the cred will be destroyed either + * by the RPC call or by the put_rpccred() below */ + get_rpccred(cred); + + task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC); + if (!IS_ERR(task)) + rpc_put_task(task); + + put_rpccred(cred); + return 1; } -/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure +/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void -gss_destroy_ctx(struct gss_cl_ctx *ctx) +gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: gss_destroy_ctx\n"); - - if (ctx->gc_gss_ctx) - gss_delete_sec_context(&ctx->gc_gss_ctx); + dprintk("RPC: gss_free_ctx\n"); kfree(ctx->gc_wire_ctx.data); kfree(ctx); } static void -gss_destroy_cred(struct rpc_cred *rc) +gss_free_ctx_callback(struct rcu_head *head) +{ + struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu); + gss_do_free_ctx(ctx); +} + +static void +gss_free_ctx(struct gss_cl_ctx *ctx) +{ + struct gss_ctx *gc_gss_ctx; + + gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx); + rcu_assign_pointer(ctx->gc_gss_ctx, NULL); + call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); + if (gc_gss_ctx) + gss_delete_sec_context(&gc_gss_ctx); +} + +static void +gss_free_cred(struct gss_cred *gss_cred) { - struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); + dprintk("RPC: gss_free_cred %p\n", gss_cred); + kfree(gss_cred); +} - dprintk("RPC: gss_destroy_cred \n"); +static void +gss_free_cred_callback(struct rcu_head *head) +{ + struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu); + gss_free_cred(gss_cred); +} - if (cred->gc_ctx) - gss_put_ctx(cred->gc_ctx); - kfree(cred); +static void +gss_destroy_cred(struct rpc_cred *cred) +{ + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct gss_cl_ctx *ctx = gss_cred->gc_ctx; + + if (gss_destroying_context(cred)) + return; + rcu_assign_pointer(gss_cred->gc_ctx, NULL); + call_rcu(&cred->cr_rcu, gss_free_cred_callback); + if (ctx) + gss_put_ctx(ctx); + kref_put(&gss_auth->kref, gss_free_callback); } /* @@ -734,16 +811,14 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) goto out_err; - atomic_set(&cred->gc_count, 1); - cred->gc_uid = acred->uid; + rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ - cred->gc_flags = 0; - cred->gc_base.cr_ops = &gss_credops; - cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; + cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; + kref_get(&gss_auth->kref); return &cred->gc_base; out_err: @@ -774,7 +849,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) * we don't really care if the credential has expired or not, * since the caller should be prepared to reinitialise it. */ - if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW)) + if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) @@ -830,7 +905,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); } else if (maj_stat != 0) { printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; @@ -855,6 +930,13 @@ gss_refresh(struct rpc_task *task) return 0; } +/* Dummy refresh routine: used only when destroying the context */ +static int +gss_refresh_null(struct rpc_task *task) +{ + return -EACCES; +} + static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { @@ -883,12 +965,15 @@ gss_validate(struct rpc_task *task, __be32 *p) maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - if (maj_stat) + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (maj_stat) { + dprintk("RPC: %5u gss_validate: gss_verify_mic returned" + "error 0x%08x\n", task->tk_pid, maj_stat); goto out_bad; + } /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -917,7 +1002,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); - status = encode(rqstp, p, obj); + status = rpc_call_xdrproc(encode, rqstp, p, obj); if (status) return status; @@ -937,7 +1022,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; q = xdr_encode_opaque(p, NULL, mic.len); @@ -1011,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; *p++ = htonl(rqstp->rq_seqno); - status = encode(rqstp, p, obj); + status = rpc_call_xdrproc(encode, rqstp, p, obj); if (status) return status; @@ -1036,7 +1121,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) return status; @@ -1070,12 +1155,12 @@ gss_wrap_req(struct rpc_task *task, /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ - status = encode(rqstp, p, obj); + status = rpc_call_xdrproc(encode, rqstp, p, obj); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - status = encode(rqstp, p, obj); + status = rpc_call_xdrproc(encode, rqstp, p, obj); break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, encode, @@ -1123,7 +1208,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; return 0; @@ -1148,7 +1233,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) return status; if (ntohl(*(*p)++) != rqstp->rq_seqno) @@ -1188,10 +1273,10 @@ gss_unwrap_resp(struct rpc_task *task, break; } /* take into account extra slack for integrity and privacy cases: */ - task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) + cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) + (savedlen - head->iov_len); out_decode: - status = decode(rqstp, p, obj); + status = rpc_call_xdrproc(decode, rqstp, p, obj); out: gss_put_ctx(ctx); dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, @@ -1199,7 +1284,7 @@ out: return status; } -static struct rpc_authops authgss_ops = { +static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, #ifdef RPC_DEBUG @@ -1211,7 +1296,7 @@ static struct rpc_authops authgss_ops = { .crcreate = gss_create_cred }; -static struct rpc_credops gss_credops = { +static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, @@ -1223,6 +1308,17 @@ static struct rpc_credops gss_credops = { .crunwrap_resp = gss_unwrap_resp, }; +static const struct rpc_credops gss_nullops = { + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh_null, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, +}; + static struct rpc_pipe_ops gss_upcall_ops = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index f441aa0b26dc..bfb6a29633dd 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -67,7 +67,7 @@ krb5_encrypt( if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_blkcipher_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 7b1943217053..9843eacef11d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -201,7 +201,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -static struct gss_api_ops gss_kerberos_ops = { +static const struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, @@ -231,6 +231,7 @@ static struct pf_desc gss_kerberos_pfs[] = { static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", .gm_owner = THIS_MODULE, + .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"}, .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 26872517ccf3..61801a069ff0 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -194,6 +194,20 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor); u32 +gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) +{ + int i; + + for (i = 0; i < gm->gm_pf_num; i++) { + if (gm->gm_pfs[i].service == service) { + return gm->gm_pfs[i].pseudoflavor; + } + } + return RPC_AUTH_MAXFLAVOR; /* illegal value */ +} +EXPORT_SYMBOL(gss_svc_to_pseudoflavor); + +u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { int i; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 7e15aa68ae64..5deb4b6e4514 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -202,7 +202,7 @@ gss_get_mic_spkm3(struct gss_ctx *ctx, return err; } -static struct gss_api_ops gss_spkm3_ops = { +static const struct gss_api_ops gss_spkm3_ops = { .gss_import_sec_context = gss_import_sec_context_spkm3, .gss_get_mic = gss_get_mic_spkm3, .gss_verify_mic = gss_verify_mic_spkm3, @@ -217,6 +217,7 @@ static struct pf_desc gss_spkm3_pfs[] = { static struct gss_api_mech gss_spkm3_mech = { .gm_name = "spkm3", .gm_owner = THIS_MODULE, + .gm_oid = {7, "\053\006\001\005\005\001\003"}, .gm_ops = &gss_spkm3_ops, .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), .gm_pfs = gss_spkm3_pfs, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 805e72584af0..7da7050f06c3 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -42,7 +42,6 @@ #include <linux/pagemap.h> #include <linux/sunrpc/auth_gss.h> -#include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/gss_err.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/svcauth_gss.h> @@ -743,6 +742,15 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) static struct auth_ops svcauthops_gss; +u32 svcauth_gss_flavor(struct auth_domain *dom) +{ + struct gss_domain *gd = container_of(dom, struct gss_domain, h); + + return gd->pseudoflavor; +} + +EXPORT_SYMBOL(svcauth_gss_flavor); + int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { @@ -854,7 +862,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_sendfile_ok = 0; + rqstp->rq_splice_ok = 0; priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { @@ -914,10 +922,23 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rsc *rsci = svcdata->rsci; struct rpc_gss_wire_cred *gc = &svcdata->clcred; + int stat; - rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); - if (rqstp->rq_client == NULL) + /* + * A gss export can be specified either by: + * export *(sec=krb5,rw) + * or by + * export gss/krb5(rw) + * The latter is deprecated; but for backwards compatibility reasons + * the nfsd code will still fall back on trying it if the former + * doesn't work; so we try to make both available to nfsd, below. + */ + rqstp->rq_gssclient = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); + if (rqstp->rq_gssclient == NULL) return SVC_DENIED; + stat = svcauth_unix_set_client(rqstp); + if (stat == SVC_DROP) + return stat; return SVC_OK; } @@ -1089,7 +1110,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, GSS_SEQ_WIN); if (svc_safe_putnetobj(resv, &rsip->out_token)) goto drop; - rqstp->rq_client = NULL; } goto complete; case RPC_GSS_PROC_DESTROY: @@ -1132,6 +1152,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } svcdata->rsci = rsci; cache_get(&rsci->h); + rqstp->rq_flavor = gss_svc_to_pseudoflavor( + rsci->mechctx->mech_type, gc->gc_svc); ret = SVC_OK; goto out; } @@ -1318,6 +1340,9 @@ out_err: if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; + if (rqstp->rq_gssclient) + auth_domain_put(rqstp->rq_gssclient); + rqstp->rq_gssclient = NULL; if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 3df9fccab2f8..537d0e8589dd 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -76,7 +76,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -101,7 +101,7 @@ nul_validate(struct rpc_task *task, __be32 *p) return p; } -struct rpc_authops authnull_ops = { +const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, #ifdef RPC_DEBUG @@ -122,7 +122,7 @@ struct rpc_auth null_auth = { }; static -struct rpc_credops null_credops = { +const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, .crmatch = nul_match, @@ -133,9 +133,11 @@ struct rpc_credops null_credops = { static struct rpc_cred null_cred = { + .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), + .cr_auth = &null_auth, .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), - .cr_flags = RPCAUTH_CRED_UPTODATE, + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, #ifdef RPC_DEBUG .cr_magic = RPCAUTH_CRED_MAGIC, #endif diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4e7733aee36e..5ed91e5bcee4 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -20,11 +20,6 @@ struct unx_cred { gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid -#define uc_count uc_base.cr_count -#define uc_flags uc_base.cr_flags -#define uc_expire uc_base.cr_expire - -#define UNX_CRED_EXPIRE (60 * HZ) #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) @@ -34,15 +29,14 @@ struct unx_cred { static struct rpc_auth unix_auth; static struct rpc_cred_cache unix_cred_cache; -static struct rpc_credops unix_credops; +static const struct rpc_credops unix_credops; static struct rpc_auth * unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (atomic_inc_return(&unix_auth.au_count) == 0) - unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); + atomic_inc(&unix_auth.au_count); return &unix_auth; } @@ -50,7 +44,7 @@ static void unx_destroy(struct rpc_auth *auth) { dprintk("RPC: destroying UNIX authenticator %p\n", auth); - rpcauth_free_credcache(auth); + rpcauth_clear_credcache(auth->au_credcache); } /* @@ -74,8 +68,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) return ERR_PTR(-ENOMEM); - atomic_set(&cred->uc_count, 1); - cred->uc_flags = RPCAUTH_CRED_UPTODATE; + rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); + cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { cred->uc_uid = 0; cred->uc_gid = 0; @@ -85,22 +79,34 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; - cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } - cred->uc_base.cr_ops = &unix_credops; - return (struct rpc_cred *) cred; + return &cred->uc_base; +} + +static void +unx_free_cred(struct unx_cred *unx_cred) +{ + dprintk("RPC: unx_free_cred %p\n", unx_cred); + kfree(unx_cred); +} + +static void +unx_free_cred_callback(struct rcu_head *head) +{ + struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); + unx_free_cred(unx_cred); } static void unx_destroy_cred(struct rpc_cred *cred) { - kfree(cred); + call_rcu(&cred->cr_rcu, unx_free_cred_callback); } /* @@ -111,7 +117,7 @@ unx_destroy_cred(struct rpc_cred *cred) static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { - struct unx_cred *cred = (struct unx_cred *) rcred; + struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); int i; if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { @@ -142,7 +148,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; + struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; @@ -175,7 +181,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); return 0; } @@ -198,13 +204,18 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_auth->au_rslack = (size >> 2) + 2; + task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; } -struct rpc_authops authunix_ops = { +void __init rpc_init_authunix(void) +{ + spin_lock_init(&unix_cred_cache.lock); +} + +const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, #ifdef RPC_DEBUG @@ -218,7 +229,6 @@ struct rpc_authops authunix_ops = { static struct rpc_cred_cache unix_cred_cache = { - .expire = UNX_CRED_EXPIRE, }; static @@ -232,7 +242,7 @@ struct rpc_auth unix_auth = { }; static -struct rpc_credops unix_credops = { +const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 543b085ae2c1..ebe344f34d1a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -371,8 +371,7 @@ int cache_unregister(struct cache_detail *cd) } if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ - cancel_delayed_work(&cache_cleaner); - flush_scheduled_work(); + cancel_delayed_work_sync(&cache_cleaner); } return 0; } @@ -1210,7 +1209,7 @@ static int c_show(struct seq_file *m, void *p) return cd->cache_show(m, cd, cp); } -static struct seq_operations cache_content_op = { +static const struct seq_operations cache_content_op = { .start = c_start, .next = c_next, .stop = c_stop, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d8fbee40a19c..52429b1ffcc1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -44,6 +44,12 @@ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ __FUNCTION__, t->tk_status) +/* + * All RPC clients are linked into this list + */ +static LIST_HEAD(all_clients); +static DEFINE_SPINLOCK(rpc_client_lock); + static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); @@ -66,6 +72,21 @@ static void call_connect_status(struct rpc_task *task); static __be32 * call_header(struct rpc_task *task); static __be32 * call_verify(struct rpc_task *task); +static int rpc_ping(struct rpc_clnt *clnt, int flags); + +static void rpc_register_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_add(&clnt->cl_clients, &all_clients); + spin_unlock(&rpc_client_lock); +} + +static void rpc_unregister_client(struct rpc_clnt *clnt) +{ + spin_lock(&rpc_client_lock); + list_del(&clnt->cl_clients); + spin_unlock(&rpc_client_lock); +} static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) @@ -111,6 +132,9 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); + err = rpciod_up(); + if (err) + goto out_no_rpciod; err = -EINVAL; if (!xprt) goto out_no_xprt; @@ -121,8 +145,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - atomic_set(&clnt->cl_users, 0); - atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; @@ -148,6 +170,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_metrics == NULL) goto out_no_stats; clnt->cl_program = program; + INIT_LIST_HEAD(&clnt->cl_tasks); + spin_lock_init(&clnt->cl_lock); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -155,6 +179,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + kref_init(&clnt->cl_kref); + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) goto out_no_path; @@ -172,6 +198,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + rpc_register_client(clnt); return clnt; out_no_auth: @@ -188,6 +215,8 @@ out_no_stats: out_err: xprt_put(xprt); out_no_xprt: + rpciod_down(); +out_no_rpciod: return ERR_PTR(err); } @@ -205,13 +234,32 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; + struct rpc_xprtsock_create xprtargs = { + .proto = args->protocol, + .srcaddr = args->saddress, + .dstaddr = args->address, + .addrlen = args->addrsize, + .timeout = args->timeout + }; + char servername[20]; - xprt = xprt_create_transport(args->protocol, args->address, - args->addrsize, args->timeout); + xprt = xprt_create_transport(&xprtargs); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; /* + * If the caller chooses not to specify a hostname, whip + * up a string representation of the passed-in address. + */ + if (args->servername == NULL) { + struct sockaddr_in *addr = + (struct sockaddr_in *) &args->address; + snprintf(servername, sizeof(servername), NIPQUAD_FMT, + NIPQUAD(addr->sin_addr.s_addr)); + args->servername = servername; + } + + /* * By default, kernel RPC client connects from a reserved port. * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, * but it is always enabled for rpciod, which handles the connect @@ -245,8 +293,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) clnt->cl_intr = 1; if (args->flags & RPC_CLNT_CREATE_AUTOBIND) clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_ONESHOT) - clnt->cl_oneshot = 1; if (args->flags & RPC_CLNT_CREATE_DISCRTRY) clnt->cl_discrtry = 1; @@ -268,24 +314,25 @@ rpc_clone_client(struct rpc_clnt *clnt) new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; - atomic_set(&new->cl_count, 1); - atomic_set(&new->cl_users, 0); + new->cl_parent = clnt; + /* Turn off autobind on clones */ + new->cl_autobind = 0; + INIT_LIST_HEAD(&new->cl_tasks); + spin_lock_init(&new->cl_lock); + rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; + kref_init(&new->cl_kref); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; - new->cl_parent = clnt; - atomic_inc(&clnt->cl_count); - new->cl_xprt = xprt_get(clnt->cl_xprt); - /* Turn off autobind on clones */ - new->cl_autobind = 0; - new->cl_oneshot = 0; - new->cl_dead = 0; - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + xprt_get(clnt->cl_xprt); + kref_get(&clnt->cl_kref); + rpc_register_client(new); + rpciod_up(); return new; out_no_path: rpc_free_iostats(new->cl_metrics); @@ -298,86 +345,86 @@ out_no_clnt: /* * Properly shut down an RPC client, terminating all outstanding - * requests. Note that we must be certain that cl_oneshot and - * cl_dead are cleared, or else the client would be destroyed - * when the last task releases it. + * requests. */ -int -rpc_shutdown_client(struct rpc_clnt *clnt) +void rpc_shutdown_client(struct rpc_clnt *clnt) { - dprintk("RPC: shutting down %s client for %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_server, - atomic_read(&clnt->cl_users)); - - while (atomic_read(&clnt->cl_users) > 0) { - /* Don't let rpc_release_client destroy us */ - clnt->cl_oneshot = 0; - clnt->cl_dead = 0; + dprintk("RPC: shutting down %s client for %s\n", + clnt->cl_protname, clnt->cl_server); + + while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); wait_event_timeout(destroy_wait, - !atomic_read(&clnt->cl_users), 1*HZ); - } - - if (atomic_read(&clnt->cl_users) < 0) { - printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", - clnt, atomic_read(&clnt->cl_users)); -#ifdef RPC_DEBUG - rpc_show_tasks(); -#endif - BUG(); + list_empty(&clnt->cl_tasks), 1*HZ); } - return rpc_destroy_client(clnt); + rpc_release_client(clnt); } /* - * Delete an RPC client + * Free an RPC client */ -int -rpc_destroy_client(struct rpc_clnt *clnt) +static void +rpc_free_client(struct kref *kref) { - if (!atomic_dec_and_test(&clnt->cl_count)) - return 1; - BUG_ON(atomic_read(&clnt->cl_users) != 0); + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { - rpcauth_destroy(clnt->cl_auth); - clnt->cl_auth = NULL; - } if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { - rpc_destroy_client(clnt->cl_parent); + rpc_release_client(clnt->cl_parent); goto out_free; } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; xprt_put(clnt->cl_xprt); + rpciod_down(); kfree(clnt); - return 0; } /* - * Release an RPC client + * Free an RPC client + */ +static void +rpc_free_auth(struct kref *kref) +{ + struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); + + if (clnt->cl_auth == NULL) { + rpc_free_client(kref); + return; + } + + /* + * Note: RPCSEC_GSS may need to send NULL RPC calls in order to + * release remaining GSS contexts. This mechanism ensures + * that it can do so safely. + */ + kref_init(kref); + rpcauth_release(clnt->cl_auth); + clnt->cl_auth = NULL; + kref_put(kref, rpc_free_client); +} + +/* + * Release reference to the RPC client */ void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, atomic_read(&clnt->cl_users)); + dprintk("RPC: rpc_release_client(%p)\n", clnt); - if (!atomic_dec_and_test(&clnt->cl_users)) - return; - wake_up(&destroy_wait); - if (clnt->cl_oneshot || clnt->cl_dead) - rpc_destroy_client(clnt); + if (list_empty(&clnt->cl_tasks)) + wake_up(&destroy_wait); + kref_put(&clnt->cl_kref, rpc_free_auth); } /** @@ -468,82 +515,96 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) rpc_restore_sigmask(oldset); } -/* - * New rpc_call implementation +static +struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, + struct rpc_message *msg, + int flags, + const struct rpc_call_ops *ops, + void *data) +{ + struct rpc_task *task, *ret; + sigset_t oldset; + + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) { + rpc_release_calldata(ops, data); + return ERR_PTR(-ENOMEM); + } + + /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ + rpc_task_sigmask(task, &oldset); + if (msg != NULL) { + rpc_call_setup(task, msg, 0); + if (task->tk_status != 0) { + ret = ERR_PTR(task->tk_status); + rpc_put_task(task); + goto out; + } + } + atomic_inc(&task->tk_count); + rpc_execute(task); + ret = task; +out: + rpc_restore_sigmask(&oldset); + return ret; +} + +/** + * rpc_call_sync - Perform a synchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags */ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { struct rpc_task *task; - sigset_t oldset; - int status; - - /* If this client is slain all further I/O fails */ - if (clnt->cl_dead) - return -EIO; + int status; BUG_ON(flags & RPC_TASK_ASYNC); - task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); - if (task == NULL) - return -ENOMEM; - - /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - - /* Set up the call info struct and execute the task */ - rpc_call_setup(task, msg, 0); - if (task->tk_status == 0) { - atomic_inc(&task->tk_count); - rpc_execute(task); - } + task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); status = task->tk_status; rpc_put_task(task); - rpc_restore_sigmask(&oldset); return status; } -/* - * New rpc_call implementation +/** + * rpc_call_async - Perform an asynchronous RPC call + * @clnt: pointer to RPC client + * @msg: RPC call parameters + * @flags: RPC call flags + * @ops: RPC call ops + * @data: user call data */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; - sigset_t oldset; - int status; - - /* If this client is slain all further I/O fails */ - status = -EIO; - if (clnt->cl_dead) - goto out_release; - - flags |= RPC_TASK_ASYNC; - - /* Create/initialize a new RPC task */ - status = -ENOMEM; - if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) - goto out_release; - - /* Mask signals on GSS_AUTH upcalls */ - rpc_task_sigmask(task, &oldset); - rpc_call_setup(task, msg, 0); - - /* Set up the call info struct and execute the task */ - status = task->tk_status; - if (status == 0) - rpc_execute(task); - else - rpc_put_task(task); - - rpc_restore_sigmask(&oldset); - return status; -out_release: - rpc_release_calldata(tk_ops, data); - return status; + task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt: pointer to RPC client + * @flags: RPC flags + * @ops: RPC call ops + * @data: user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *tk_ops, + void *data) +{ + return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); +} +EXPORT_SYMBOL(rpc_run_task); void rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) @@ -745,7 +806,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_auth->au_cslack; + unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -843,10 +904,8 @@ call_encode(struct rpc_task *task) if (encode == NULL) return; - lock_kernel(); task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - unlock_kernel(); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ rpc_delay(task, 3*HZ); @@ -1177,10 +1236,8 @@ call_decode(struct rpc_task *task) task->tk_action = rpc_exit_task; if (decode) { - lock_kernel(); task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, task->tk_msg.rpc_resp); - unlock_kernel(); } dprintk("RPC: %5u call_decode result %d\n", task->tk_pid, task->tk_status); @@ -1273,9 +1330,9 @@ call_verify(struct rpc_task *task) * - if it isn't pointer subtraction in the NFS client may give * undefined results */ - printk(KERN_WARNING - "call_verify: XDR representation not a multiple of" - " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + dprintk("RPC: %5u %s: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_pid, __FUNCTION__, + task->tk_rqstp->rq_rcv_buf.len); goto out_eio; } if ((len -= 3) < 0) @@ -1283,7 +1340,8 @@ call_verify(struct rpc_task *task) p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { - printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); + dprintk("RPC: %5u %s: not an RPC reply: %x\n", + task->tk_pid, __FUNCTION__, n); goto out_garbage; } if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { @@ -1334,7 +1392,8 @@ call_verify(struct rpc_task *task) "authentication.\n", task->tk_client->cl_server); break; default: - printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); + dprintk("RPC: %5u %s: unknown auth error: %x\n", + task->tk_pid, __FUNCTION__, n); error = -EIO; } dprintk("RPC: %5u %s: call rejected %d\n", @@ -1342,7 +1401,8 @@ call_verify(struct rpc_task *task) goto out_err; } if (!(p = rpcauth_checkverf(task, p))) { - printk(KERN_WARNING "call_verify: auth check failed\n"); + dprintk("RPC: %5u %s: auth check failed\n", + task->tk_pid, __FUNCTION__); goto out_garbage; /* bad verifier, retry */ } len = p - (__be32 *)iov->iov_base - 1; @@ -1381,7 +1441,8 @@ call_verify(struct rpc_task *task) task->tk_pid, __FUNCTION__); break; /* retry */ default: - printk(KERN_WARNING "call_verify: server accept status: %x\n", n); + dprintk("RPC: %5u %s: server accept status: %x\n", + task->tk_pid, __FUNCTION__, n); /* Also retry */ } @@ -1395,14 +1456,16 @@ out_garbage: out_retry: return ERR_PTR(-EAGAIN); } - printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); out_eio: error = -EIO; out_err: rpc_exit(task, error); + dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, + __FUNCTION__, error); return ERR_PTR(error); out_overflow: - printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); + dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid, + __FUNCTION__); goto out_garbage; } @@ -1421,7 +1484,7 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt, int flags) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, @@ -1432,3 +1495,51 @@ int rpc_ping(struct rpc_clnt *clnt, int flags) put_rpccred(msg.rpc_cred); return err; } + +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + .rpc_cred = cred, + }; + return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); +} +EXPORT_SYMBOL(rpc_call_null); + +#ifdef RPC_DEBUG +void rpc_show_tasks(void) +{ + struct rpc_clnt *clnt; + struct rpc_task *t; + + spin_lock(&rpc_client_lock); + if (list_empty(&all_clients)) + goto out; + printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " + "-rpcwait -action- ---ops--\n"); + list_for_each_entry(clnt, &all_clients, cl_clients) { + if (list_empty(&clnt->cl_tasks)) + continue; + spin_lock(&clnt->cl_lock); + list_for_each_entry(t, &clnt->cl_tasks, tk_task) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + + printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_flags, t->tk_status, + t->tk_client, + (t->tk_client ? t->tk_client->cl_prog : 0), + t->tk_rqstp, t->tk_timeout, + rpc_waitq, + t->tk_action, t->tk_ops); + } + spin_unlock(&clnt->cl_lock); + } +out: + spin_unlock(&rpc_client_lock); +} +#endif diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5887457dc936..669e12a4ed18 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -132,8 +132,7 @@ rpc_close_pipes(struct inode *inode) rpci->nwriters = 0; if (ops->release_pipe) ops->release_pipe(inode); - cancel_delayed_work(&rpci->queue_timeout); - flush_workqueue(rpciod_workqueue); + cancel_delayed_work_sync(&rpci->queue_timeout); } rpc_inode_setowner(inode, NULL); mutex_unlock(&inode->i_mutex); @@ -344,7 +343,7 @@ rpc_info_open(struct inode *inode, struct file *file) mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); m->private = clnt; } else { single_release(inode, file); @@ -448,24 +447,31 @@ void rpc_put_mount(void) simple_release_fs(&rpc_mount, &rpc_mount_count); } +static int rpc_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations rpc_dentry_operations = { + .d_delete = rpc_delete_dentry, +}; + static int rpc_lookup_parent(char *path, struct nameidata *nd) { + struct vfsmount *mnt; + if (path[0] == '\0') return -ENOENT; - nd->mnt = rpc_get_mount(); - if (IS_ERR(nd->mnt)) { + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { printk(KERN_WARNING "%s: %s failed to mount " "pseudofilesystem \n", __FILE__, __FUNCTION__); - return PTR_ERR(nd->mnt); + return PTR_ERR(mnt); } - mntget(nd->mnt); - nd->dentry = dget(rpc_mount->mnt_root); - nd->last_type = LAST_ROOT; - nd->flags = LOOKUP_PARENT; - nd->depth = 0; - if (path_walk(path, nd)) { + if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { printk(KERN_WARNING "%s: %s failed to find path %s\n", __FILE__, __FUNCTION__, path); rpc_put_mount(); @@ -506,7 +512,7 @@ rpc_get_inode(struct super_block *sb, int mode) * FIXME: This probably has races. */ static void -rpc_depopulate(struct dentry *parent) +rpc_depopulate(struct dentry *parent, int start, int eof) { struct inode *dir = parent->d_inode; struct list_head *pos, *next; @@ -518,6 +524,10 @@ repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { dentry = list_entry(pos, struct dentry, d_u.d_child); + if (!dentry->d_inode || + dentry->d_inode->i_ino < start || + dentry->d_inode->i_ino >= eof) + continue; spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); @@ -533,11 +543,11 @@ repeat: if (n) { do { dentry = dvec[--n]; - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); + if (S_ISREG(dentry->d_inode->i_mode)) simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + else if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(dir, dentry); + d_delete(dentry); dput(dentry); } while (n); goto repeat; @@ -560,6 +570,7 @@ rpc_populate(struct dentry *parent, dentry = d_alloc_name(parent, files[i].name); if (!dentry) goto out_bad; + dentry->d_op = &rpc_dentry_operations; mode = files[i].mode; inode = rpc_get_inode(dir->i_sb, mode); if (!inode) { @@ -607,21 +618,14 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int error; - - shrink_dcache_parent(dentry); - if (d_unhashed(dentry)) - return 0; - if ((error = simple_rmdir(dir, dentry)) != 0) - return error; - if (!error) { - inode_dir_notify(dir, DN_DELETE); - d_drop(dentry); - } - return 0; + error = simple_rmdir(dir, dentry); + if (!error) + d_delete(dentry); + return error; } static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len) +rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -630,7 +634,9 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len) dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; - if (dentry->d_inode) { + if (!dentry->d_inode) + dentry->d_op = &rpc_dentry_operations; + else if (exclusive) { dput(dentry); dentry = ERR_PTR(-EEXIST); goto out_err; @@ -649,7 +655,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); - dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1); if (IS_ERR(dentry)) rpc_release_path(nd); return dentry; @@ -681,7 +687,7 @@ out: rpc_release_path(&nd); return dentry; err_depopulate: - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); __rpc_rmdir(dir, dentry); err_dput: dput(dentry); @@ -701,7 +707,7 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); error = __rpc_rmdir(dir, dentry); dput(dentry); mutex_unlock(&dir->i_mutex); @@ -716,10 +722,21 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_create(parent, name, strlen(name)); + dentry = rpc_lookup_create(parent, name, strlen(name), 0); if (IS_ERR(dentry)) return dentry; dir = parent->d_inode; + if (dentry->d_inode) { + rpci = RPC_I(dentry->d_inode); + if (rpci->private != private || + rpci->ops != ops || + rpci->flags != flags) { + dput (dentry); + dentry = ERR_PTR(-EBUSY); + } + rpci->nkern_readwriters++; + goto out; + } inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; @@ -730,6 +747,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->private = private; rpci->flags = flags; rpci->ops = ops; + rpci->nkern_readwriters = 1; inode_dir_notify(dir, DN_CREATE); dget(dentry); out: @@ -754,13 +772,11 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (!d_unhashed(dentry)) { - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - } - inode_dir_notify(dir, DN_DELETE); + if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + if (!error) + d_delete(dentry); } dput(dentry); mutex_unlock(&dir->i_mutex); @@ -833,6 +849,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) rpci->nreaders = 0; rpci->nwriters = 0; INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->in_downcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); @@ -849,7 +866,7 @@ int register_rpc_pipefs(void) sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (!rpc_inode_cachep) return -ENOMEM; err = register_filesystem(&rpc_pipe_fs_type); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 6c7aa8a1f0c6..d1740dbab991 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -12,6 +12,8 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ +#include <linux/module.h> + #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> @@ -184,8 +186,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), + .flags = (RPC_CLNT_CREATE_NOPING | + RPC_CLNT_CREATE_INTR), }; ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); @@ -238,6 +240,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) error = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (error < 0) printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -246,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -#ifdef CONFIG_ROOT_NFS /** - * rpcb_getport_external - obtain the port for an RPC service on a given host + * rpcb_getport_sync - obtain the port for an RPC service on a given host * @sin: address of remote peer * @prog: RPC program number to bind * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * * Called from outside the RPC client in a synchronous task context. + * Uses default timeout parameters specified by underlying transport. * - * For now, this supports only version 2 queries, but is used only by - * mount_clnt for NFS_ROOT. + * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 */ -int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, - __u32 vers, int prot) +int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, @@ -277,15 +279,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, char hostname[40]; int status; - dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", + __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); status = rpc_call_sync(rpcb_clnt, &msg, 0); + rpc_shutdown_client(rpcb_clnt); if (status >= 0) { if (map.r_port != 0) @@ -294,16 +297,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, } return status; } -#endif +EXPORT_SYMBOL_GPL(rpcb_getport_sync); /** - * rpcb_getport - obtain the port for a given RPC service on a given host + * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ -void rpcb_getport(struct rpc_task *task) +void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; int bind_version; @@ -314,17 +317,17 @@ void rpcb_getport(struct rpc_task *task) struct sockaddr addr; int status; - dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n", - task->tk_pid, clnt->cl_server, - clnt->cl_prog, clnt->cl_vers, xprt->prot); + dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", + task->tk_pid, __FUNCTION__, + clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { status = -EACCES; /* tell caller to check again */ - dprintk("RPC: %5u rpcb_getport waiting for another binder\n", - task->tk_pid); + dprintk("RPC: %5u %s: waiting for another binder\n", + task->tk_pid, __FUNCTION__); goto bailout_nowake; } @@ -335,27 +338,28 @@ void rpcb_getport(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid); + dprintk("RPC: %5u %s: already bound\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EACCES; /* tell caller to try again later */ - dprintk("RPC: %5u rpcb_getport no more getport versions " - "available\n", task->tk_pid); + dprintk("RPC: %5u %s: no more getport versions available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; - dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n", - task->tk_pid, bind_version); + dprintk("RPC: %5u %s: trying rpcbind version %u\n", + task->tk_pid, __FUNCTION__, bind_version); map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u rpcb_getport no memory available\n", - task->tk_pid); + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } map->r_prog = clnt->cl_prog; @@ -373,16 +377,17 @@ void rpcb_getport(struct rpc_task *task) rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n", - task->tk_pid, PTR_ERR(rpcb_clnt)); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); goto bailout; } child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; - dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", - task->tk_pid); + dprintk("RPC: %5u %s: rpc_run_task failed\n", + task->tk_pid, __FUNCTION__); goto bailout_nofree; } rpc_put_task(child); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 944d75396fb3..954d7ec86c7e 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -25,7 +25,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa -static int rpc_task_id; #endif /* @@ -40,7 +39,6 @@ static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); -static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -50,23 +48,11 @@ static void rpc_release_task(struct rpc_task *task); static RPC_WAITQ(delay_queue, "delayq"); /* - * All RPC tasks are linked into this list - */ -static LIST_HEAD(all_tasks); - -/* * rpciod-related stuff */ -static DEFINE_MUTEX(rpciod_mutex); -static unsigned int rpciod_users; struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for other critical sections of code. - */ -static DEFINE_SPINLOCK(rpc_sched_lock); - -/* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). @@ -267,18 +253,33 @@ static int rpc_wait_bit_interruptible(void *word) return 0; } +#ifdef RPC_DEBUG +static void rpc_task_set_debuginfo(struct rpc_task *task) +{ + static atomic_t rpc_pid; + + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = atomic_inc_return(&rpc_pid); +} +#else +static inline void rpc_task_set_debuginfo(struct rpc_task *task) +{ +} +#endif + static void rpc_set_active(struct rpc_task *task) { + struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; - spin_lock(&rpc_sched_lock); -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif + rpc_task_set_debuginfo(task); /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); + clnt = task->tk_client; + if (clnt != NULL) { + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } /* @@ -818,6 +819,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons if (tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; task->tk_calldata = calldata; + INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -830,7 +832,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_workqueue = rpciod_workqueue; if (clnt) { - atomic_inc(&clnt->cl_users); + kref_get(&clnt->cl_kref); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) @@ -860,9 +862,7 @@ static void rpc_free_task(struct rcu_head *rcu) } /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { @@ -870,7 +870,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task = rpc_alloc_task(); if (!task) - goto cleanup; + goto out; rpc_init_task(task, clnt, flags, tk_ops, calldata); @@ -878,16 +878,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; - -cleanup: - /* Check whether to release the client */ - if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ - rpc_release_client(clnt); - } - goto out; } @@ -920,11 +910,13 @@ static void rpc_release_task(struct rpc_task *task) #endif dprintk("RPC: %5u release task\n", task->tk_pid); - /* Remove from global task list */ - spin_lock(&rpc_sched_lock); - list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); - + if (!list_empty(&task->tk_task)) { + struct rpc_clnt *clnt = task->tk_client; + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + } BUG_ON (RPC_IS_QUEUED(task)); /* Synchronously delete any running timer */ @@ -939,29 +931,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, - void *data) -{ - struct rpc_task *task; - task = rpc_new_task(clnt, flags, ops, data); - if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); - } - atomic_inc(&task->tk_count); - rpc_execute(task); - return task; -} -EXPORT_SYMBOL(rpc_run_task); - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -969,141 +938,70 @@ EXPORT_SYMBOL(rpc_run_task); void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; - struct list_head *le; - dprintk("RPC: killing all tasks for client %p\n", clnt); + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); /* * Spin lock all_tasks to prevent changes... */ - spin_lock(&rpc_sched_lock); - alltask_for_each(rovr, le, &all_tasks) { + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; - if (!clnt || rovr->tk_client == clnt) { + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); rpc_wake_up_task(rovr); } } - spin_unlock(&rpc_sched_lock); + spin_unlock(&clnt->cl_lock); } -static void rpciod_killall(void) +int rpciod_up(void) { - unsigned long flags; - - while (!list_empty(&all_tasks)) { - clear_thread_flag(TIF_SIGPENDING); - rpc_killall_tasks(NULL); - flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_tasks)) { - dprintk("RPC: rpciod_killall: waiting for tasks " - "to exit\n"); - yield(); - } - } + return try_module_get(THIS_MODULE) ? 0 : -EINVAL; +} - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); +void rpciod_down(void) +{ + module_put(THIS_MODULE); } /* - * Start up the rpciod process if it's not already running. + * Start up the rpciod workqueue. */ -int -rpciod_up(void) +static int rpciod_start(void) { struct workqueue_struct *wq; - int error = 0; - mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_up: users %u\n", rpciod_users); - rpciod_users++; - if (rpciod_workqueue) - goto out; - /* - * If there's no pid, we should be the first user. - */ - if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); /* * Create the rpciod thread and wait for it to start. */ - error = -ENOMEM; + dprintk("RPC: creating workqueue rpciod\n"); wq = create_workqueue("rpciod"); - if (wq == NULL) { - printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); - rpciod_users--; - goto out; - } rpciod_workqueue = wq; - error = 0; -out: - mutex_unlock(&rpciod_mutex); - return error; + return rpciod_workqueue != NULL; } -void -rpciod_down(void) +static void rpciod_stop(void) { - mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_down sema %u\n", rpciod_users); - if (rpciod_users) { - if (--rpciod_users) - goto out; - } else - printk(KERN_WARNING "rpciod_down: no users??\n"); + struct workqueue_struct *wq = NULL; - if (!rpciod_workqueue) { - dprintk("RPC: rpciod_down: Nothing to do!\n"); - goto out; - } - rpciod_killall(); + if (rpciod_workqueue == NULL) + return; + dprintk("RPC: destroying workqueue rpciod\n"); - destroy_workqueue(rpciod_workqueue); + wq = rpciod_workqueue; rpciod_workqueue = NULL; - out: - mutex_unlock(&rpciod_mutex); + destroy_workqueue(wq); } -#ifdef RPC_DEBUG -void rpc_show_tasks(void) -{ - struct list_head *le; - struct rpc_task *t; - - spin_lock(&rpc_sched_lock); - if (list_empty(&all_tasks)) { - spin_unlock(&rpc_sched_lock); - return; - } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); - alltask_for_each(t, le, &all_tasks) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); - } - spin_unlock(&rpc_sched_lock); -} -#endif - void rpc_destroy_mempool(void) { + rpciod_stop(); if (rpc_buffer_mempool) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) @@ -1120,13 +1018,13 @@ rpc_init_mempool(void) rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_task_slabp) goto err_nomem; rpc_buffer_slabp = kmem_cache_create("rpc_buffers", RPC_BUFFER_MAXSIZE, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_buffer_slabp) goto err_nomem; rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE, @@ -1137,6 +1035,8 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; + if (!rpciod_start()) + goto err_nomem; return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 73075dec83c0..384c4ad5ab86 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -28,15 +28,11 @@ EXPORT_SYMBOL(rpc_init_task); EXPORT_SYMBOL(rpc_sleep_on); EXPORT_SYMBOL(rpc_wake_up_next); EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpciod_down); -EXPORT_SYMBOL(rpciod_up); -EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); -EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_killall_tasks); EXPORT_SYMBOL(rpc_call_sync); @@ -61,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_unregister); EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); -EXPORT_SYMBOL(rpcauth_free_credcache); +EXPORT_SYMBOL(rpcauth_destroy_credcache); EXPORT_SYMBOL(rpcauth_init_credcache); EXPORT_SYMBOL(put_rpccred); @@ -156,6 +152,7 @@ init_sunrpc(void) cache_register(&ip_map_cache); cache_register(&unix_gid_cache); init_socket_xprt(); + rpcauth_init_module(); out: return err; } @@ -163,6 +160,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpcauth_remove_module(); cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e673ef993904..55ea6df069de 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -814,7 +814,7 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ - rqstp->rq_sendfile_ok = 1; + rqstp->rq_splice_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) svc_putnl(resv, 0); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 07dcd20cbee4..411479411b21 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -5,6 +5,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/gss_api.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> @@ -637,7 +638,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, } } -static int +int svcauth_unix_set_client(struct svc_rqst *rqstp) { struct sockaddr_in *sin = svc_addr_in(rqstp); @@ -672,6 +673,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) return SVC_OK; } +EXPORT_SYMBOL(svcauth_unix_set_client); + static int svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { @@ -707,6 +710,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); + rqstp->rq_flavor = RPC_AUTH_NULL; return SVC_OK; } @@ -784,6 +788,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); + rqstp->rq_flavor = RPC_AUTH_UNIX; return SVC_OK; badcred: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5baf48de2558..036ab520df21 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -131,13 +131,13 @@ static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) case AF_INET: snprintf(buf, len, "%u.%u.%u.%u, port=%u", NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), - htons(((struct sockaddr_in *) addr)->sin_port)); + ntohs(((struct sockaddr_in *) addr)->sin_port)); break; case AF_INET6: snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), - htons(((struct sockaddr_in6 *) addr)->sin6_port)); + ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; default: @@ -644,6 +644,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; + struct sockaddr *sin; int len; len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, @@ -654,6 +655,19 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); rqstp->rq_addrlen = svsk->sk_remotelen; + /* Destination address in request is needed for binding the + * source address in RPC callbacks later. + */ + sin = (struct sockaddr *)&svsk->sk_local; + switch (sin->sa_family) { + case AF_INET: + rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; + break; + case AF_INET6: + rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; + break; + } + dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", svsk, iov[0].iov_base, iov[0].iov_len, len); @@ -1064,6 +1078,12 @@ svc_tcp_accept(struct svc_sock *svsk) goto failed; memcpy(&newsvsk->sk_remote, sin, slen); newsvsk->sk_remotelen = slen; + err = kernel_getsockname(newsock, sin, &slen); + if (unlikely(err < 0)) { + dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + slen = offsetof(struct sockaddr, sa_data); + } + memcpy(&newsvsk->sk_local, sin, slen); svc_sock_received(newsvsk); @@ -1090,7 +1110,8 @@ svc_tcp_accept(struct svc_sock *svsk) serv->sv_name); printk(KERN_NOTICE "%s: last TCP connect from %s\n", - serv->sv_name, buf); + serv->sv_name, __svc_print_addr(sin, + buf, sizeof(buf))); } /* * Always select the oldest socket. It's not fair, @@ -1572,7 +1593,7 @@ svc_age_temp_sockets(unsigned long closure) if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) continue; - if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags)) + if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) continue; atomic_inc(&svsk->sk_inuse); list_move(le, &to_be_aged); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5b05b73e4c1d..c8c2edccad7e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); } /* @@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data) if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); @@ -886,27 +886,24 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i /** * xprt_create_transport - create an RPC transport - * @proto: requested transport protocol - * @ap: remote peer address - * @size: length of address - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) +struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; - switch (proto) { + switch (args->proto) { case IPPROTO_UDP: - xprt = xs_setup_udp(ap, size, to); + xprt = xs_setup_udp(args); break; case IPPROTO_TCP: - xprt = xs_setup_tcp(ap, size, to); + xprt = xs_setup_tcp(args); break; default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - proto); + args->proto); return ERR_PTR(-EIO); } if (IS_ERR(xprt)) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cc33c5880abb..4ae7eed7f617 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -235,6 +235,7 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; + struct sockaddr_storage addr; unsigned short port; /* @@ -653,8 +654,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&transport->connect_worker); - flush_scheduled_work(); + cancel_rearming_delayed_work(&transport->connect_worker); xprt_disconnect(xprt); xs_close(xprt); @@ -1001,7 +1001,7 @@ static void xs_tcp_state_change(struct sock *sk) /* Try to schedule an autoclose RPC calls */ set_bit(XPRT_CLOSE_WAIT, &xprt->state); if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - schedule_work(&xprt->task_cleanup); + queue_work(rpciod_workqueue, &xprt->task_cleanup); default: xprt_disconnect(xprt); } @@ -1146,31 +1146,36 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) sap->sin_port = htons(port); } -static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; + struct sockaddr_in *sa; int err; unsigned short port = transport->port; + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in *)&transport->addr; + myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); + if (!transport->xprt.resvport) + break; if (err == 0) { transport->port = port; - dprintk("RPC: xs_bindresvport bound to port %u\n", - port); - return 0; + break; } if (port <= xprt_min_resvport) port = xprt_max_resvport; else port--; } while (err == -EADDRINUSE && port != transport->port); - - dprintk("RPC: can't bind to reserved port (%d).\n", -err); + dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", + NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); return err; } @@ -1229,7 +1234,7 @@ static void xs_udp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1316,7 +1321,7 @@ static void xs_tcp_connect_worker(struct work_struct *work) } xs_reclassify_socket(sock); - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1410,18 +1415,16 @@ static void xs_connect(struct rpc_task *task) dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&transport->connect_worker, - xprt->reestablish_timeout); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, + xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_delayed_work(&transport->connect_worker, 0); - - /* flush_scheduled_work can sleep... */ - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + queue_delayed_work(rpciod_workqueue, + &transport->connect_worker, 0); } } @@ -1476,7 +1479,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1493,7 +1496,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, - .rpcbind = rpcb_getport, + .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1505,12 +1508,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; - if (addrlen > sizeof(xprt->addr)) { + if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: xs_setup_xprt: address too large\n"); return ERR_PTR(-EBADF); } @@ -1532,8 +1535,10 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns return ERR_PTR(-ENOMEM); } - memcpy(&xprt->addr, addr, addrlen); - xprt->addrlen = addrlen; + memcpy(&xprt->addr, args->dstaddr, args->addrlen); + xprt->addrlen = args->addrlen; + if (args->srcaddr) + memcpy(&new->addr, args->srcaddr, args->addrlen); new->port = xs_get_random_port(); return xprt; @@ -1541,22 +1546,20 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns /** * xs_setup_udp - Set up transport to use a UDP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_UDP; @@ -1572,8 +1575,8 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_udp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); @@ -1586,22 +1589,20 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ /** * xs_setup_tcp - Set up transport to use a TCP socket - * @addr: address of remote server - * @addrlen: length of address in bytes - * @to: timeout parameters + * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct sock_xprt *transport; - xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) xprt_set_bound(xprt); xprt->prot = IPPROTO_TCP; @@ -1616,8 +1617,8 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_ xprt->ops = &xs_tcp_ops; - if (to) - xprt->timeout = *to; + if (args->timeout) + xprt->timeout = *args->timeout; else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 77d2d9ce8962..711ca4b1f051 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,8 +1,8 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,6 +87,9 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, /** * recv_msg - handle incoming TIPC message from an Ethernet interface * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using Ethernet multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). * Routine truncates any Ethernet padding/CRC appended to the message, * and ensures message size matches actual length */ @@ -98,9 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, u32 size; if (likely(eb_ptr->bearer)) { - if (likely(!dev->promiscuity) || - !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) || - !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); skb_trim(buf, size); if (likely(buf->len == size)) { diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e1dcf663f8a6..0c70010a7dfe 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -97,7 +97,7 @@ int tipc_handler_start(void) { tipc_queue_item_cache = kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!tipc_queue_item_cache) return -ENOMEM; diff --git a/net/tipc/link.c b/net/tipc/link.c index 2124f32ef29f..1b17fecee747 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,8 +1,8 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 1996-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -423,6 +423,17 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, return NULL; } + if (LINK_LOG_BUF_SIZE) { + char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC); + + if (!pb) { + kfree(l_ptr); + warn("Link creation failed, no memory for print buffer\n"); + return NULL; + } + tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); + } + l_ptr->addr = peer; if_name = strchr(b_ptr->publ.name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", @@ -432,8 +443,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f is appended to link name by reset/activate */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); - k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); - list_add_tail(&l_ptr->link_list, &b_ptr->links); l_ptr->checkpoint = 1; l_ptr->b_ptr = b_ptr; link_set_supervision_props(l_ptr, b_ptr->media->tolerance); @@ -459,21 +468,14 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr->owner = tipc_node_attach_link(l_ptr); if (!l_ptr->owner) { + if (LINK_LOG_BUF_SIZE) + kfree(l_ptr->print_buf.buf); kfree(l_ptr); return NULL; } - if (LINK_LOG_BUF_SIZE) { - char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC); - - if (!pb) { - kfree(l_ptr); - warn("Link creation failed, no memory for print buffer\n"); - return NULL; - } - tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); - } - + k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); + list_add_tail(&l_ptr->link_list, &b_ptr->links); tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr); dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n", @@ -1260,7 +1262,7 @@ again: * (Must not hold any locks while building message.) */ - res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt, + res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); @@ -1271,7 +1273,7 @@ again: if (likely(l_ptr)) { if (likely(buf)) { res = link_send_buf_fast(l_ptr, buf, - &sender->max_pkt); + &sender->publ.max_pkt); if (unlikely(res < 0)) buf_discard(buf); exit: @@ -1299,12 +1301,12 @@ exit: * then re-try fast path or fragment the message */ - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) + if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) goto again; return link_send_sections_long(sender, msg_sect, @@ -1357,7 +1359,7 @@ static int link_send_sections_long(struct port *sender, again: fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; + max_pkt = sender->publ.max_pkt - INT_H_SIZE; /* leave room for tunnel header in case of link changeover */ fragm_sz = max_pkt - INT_H_SIZE; /* leave room for fragmentation header in each fragment */ @@ -1463,7 +1465,7 @@ error: goto reject; } if (link_max_pkt(l_ptr) < max_pkt) { - sender->max_pkt = link_max_pkt(l_ptr); + sender->publ.max_pkt = link_max_pkt(l_ptr); tipc_node_unlock(node); for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; @@ -2381,10 +2383,10 @@ void tipc_link_changeover(struct link *l_ptr) struct tipc_msg *msg = buf_msg(crs); if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { - u32 msgcount = msg_msgcnt(msg); struct tipc_msg *m = msg_get_wrapped(msg); unchar* pos = (unchar*)m; + msgcount = msg_msgcnt(msg); while (msgcount--) { msg_set_seqno(m,msg_seqno(msg)); tipc_link_tunnel(l_ptr, &tunnel_hdr, m, diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 35d5ba1d4f42..ce2659836374 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -72,10 +72,8 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { val = (val & mask) << pos; - val = htonl(val); - mask = htonl(mask << pos); - m->hdr[w] &= ~mask; - m->hdr[w] |= val; + m->hdr[w] &= ~htonl(mask << pos); + m->hdr[w] |= htonl(val); } /* diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9dfc9127acdd..ac7dfdda7973 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -501,7 +501,7 @@ end_node: * sequence overlapping with the requested sequence */ -void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s) +static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -1052,12 +1052,11 @@ int tipc_nametbl_init(void) { int array_size = sizeof(struct hlist_head) * tipc_nametbl_size; - table.types = kmalloc(array_size, GFP_ATOMIC); + table.types = kzalloc(array_size, GFP_ATOMIC); if (!table.types) return -ENOMEM; write_lock_bh(&tipc_nametbl_lock); - memset(table.types, 0, array_size); table.local_publ_count = 0; write_unlock_bh(&tipc_nametbl_lock); return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index e2e452a62ba1..598f4d3a0098 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -241,8 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr) char addr_string[16]; if (n_ptr->link_cnt >= 2) { - char addr_string[16]; - err("Attempt to create third link to %s\n", addr_string_fill(addr_string, n_ptr->addr)); return NULL; diff --git a/net/tipc/port.c b/net/tipc/port.c index bcd5da00737b..76088153524c 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -1,8 +1,8 @@ /* * net/tipc/port.c: TIPC port code * - * Copyright (c) 1992-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1992-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,6 @@ #include "addr.h" #include "link.h" #include "node.h" -#include "port.h" #include "name_table.h" #include "user_reg.h" #include "msg.h" @@ -239,6 +238,8 @@ u32 tipc_createport_raw(void *usr_handle, } tipc_port_lock(ref); + p_ptr->publ.usr_handle = usr_handle; + p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); @@ -248,11 +249,9 @@ u32 tipc_createport_raw(void *usr_handle, msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; - p_ptr->publ.usr_handle = usr_handle; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->congested_link = NULL; - p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; p_ptr->user_port = NULL; @@ -1243,7 +1242,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) res = TIPC_OK; exit: tipc_port_unlock(p_ptr); - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 7ef4d64b32f7..e5f8c16429bd 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -1,8 +1,8 @@ /* * net/tipc/port.h: Include file for TIPC port code * - * Copyright (c) 1994-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 1994-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,7 +81,6 @@ struct user_port { * @acked: * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime - * @max_pkt: maximum packet size "hint" used when building messages sent by port * @probing_state: * @probing_interval: * @last_in_seqno: @@ -102,7 +101,6 @@ struct port { u32 acked; struct list_head publications; u32 pub_count; - u32 max_pkt; u32 probing_state; u32 probing_interval; u32 last_in_seqno; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 45832fb75ea4..84110172031e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,8 +1,8 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -607,23 +607,24 @@ exit: static int send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { + struct tipc_port *tport; struct msghdr my_msg; struct iovec my_iov; struct iovec *curr_iov; int curr_iovlen; char __user *curr_start; + u32 hdr_size; int curr_left; int bytes_to_send; int bytes_sent; int res; - if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) - return send_packet(iocb, sock, m, total_len); - - /* Can only send large data streams if already connected */ + /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (sock->state == SS_UNCONNECTED) + return send_packet(iocb, sock, m, total_len); + else if (sock->state == SS_DISCONNECTING) return -EPIPE; else return -ENOTCONN; @@ -648,17 +649,25 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, my_msg.msg_name = NULL; bytes_sent = 0; + tport = tipc_sk(sock->sk)->p; + hdr_size = msg_hdr_sz(&tport->phdr); + while (curr_iovlen--) { curr_start = curr_iov->iov_base; curr_left = curr_iov->iov_len; while (curr_left) { - bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE) - ? curr_left : TIPC_MAX_USER_MSG_SIZE; + bytes_to_send = tport->max_pkt - hdr_size; + if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) + bytes_to_send = TIPC_MAX_USER_MSG_SIZE; + if (curr_left < bytes_to_send) + bytes_to_send = curr_left; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { - return bytes_sent ? bytes_sent : res; + if (bytes_sent != 0) + res = bytes_sent; + return res; } curr_left -= bytes_to_send; curr_start += bytes_to_send; @@ -1600,33 +1609,6 @@ static int getsockopt(struct socket *sock, } /** - * Placeholders for non-implemented functionality - * - * Returns error code (POSIX-compliant where defined) - */ - -static int ioctl(struct socket *s, u32 cmd, unsigned long arg) -{ - return -EINVAL; -} - -static int no_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - return -EINVAL; -} -static ssize_t no_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - return -EINVAL; -} - -static int no_skpair(struct socket *s1, struct socket *s2) -{ - return -EOPNOTSUPP; -} - -/** * Protocol switches for the various types of TIPC sockets */ @@ -1636,19 +1618,19 @@ static struct proto_ops msg_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_msg, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops packet_ops = { @@ -1657,19 +1639,19 @@ static struct proto_ops packet_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_packet, .recvmsg = recv_msg, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct proto_ops stream_ops = { @@ -1678,19 +1660,19 @@ static struct proto_ops stream_ops = { .release = release, .bind = bind, .connect = connect, - .socketpair = no_skpair, + .socketpair = sock_no_socketpair, .accept = accept, .getname = get_name, .poll = poll, - .ioctl = ioctl, + .ioctl = sock_no_ioctl, .listen = listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, .sendmsg = send_stream, .recvmsg = recv_stream, - .mmap = no_mmap, - .sendpage = no_sendpage + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage }; static struct net_proto_family tipc_family_ops = { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d70fa30d4294..a05c34260e70 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,14 +118,40 @@ int sysctl_unix_max_dgram_qlen __read_mostly = 10; -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -DEFINE_SPINLOCK(unix_table_lock); +static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +static DEFINE_SPINLOCK(unix_table_lock); static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct sock *first_unix_socket(int *i) +{ + for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + if (!hlist_empty(&unix_socket_table[*i])) + return __sk_head(&unix_socket_table[*i]); + } + return NULL; +} + +static struct sock *next_unix_socket(int *i, struct sock *s) +{ + struct sock *next = sk_next(s); + /* More in this chain? */ + if (next) + return next; + /* Look for next non-empty chain. */ + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { + if (!hlist_empty(&unix_socket_table[*i])) + return __sk_head(&unix_socket_table[*i]); + } + return NULL; +} + +#define forall_unix_sockets(i, s) \ + for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s))) + #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { @@ -592,7 +618,8 @@ static struct sock * unix_create1(struct socket *sock) u->dentry = NULL; u->mnt = NULL; spin_lock_init(&u->lock); - atomic_set(&u->inflight, sock ? 0 : -1); + atomic_set(&u->inflight, 0); + INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); unix_insert_socket(unix_sockets_unbound, sk); @@ -1134,9 +1161,6 @@ restart: /* take ten and and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); - /* Undo artificially decreased inflight after embrion - * is installed to listening socket. */ - atomic_inc(&newu->inflight); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other, 0); @@ -2048,7 +2072,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations unix_seq_ops = { +static const struct seq_operations unix_seq_ops = { .start = unix_seq_start, .next = unix_seq_next, .stop = unix_seq_stop, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index f20b7ea7c555..406b6433e467 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -62,6 +62,10 @@ * AV 1 Mar 1999 * Damn. Added missing check for ->dead in listen queues scanning. * + * Miklos Szeredi 25 Jun 2007 + * Reimplement with a cycle collecting algorithm. This should + * solve several problems with the previous code, like being racy + * wrt receive and holding up unrelated socket operations. */ #include <linux/kernel.h> @@ -84,10 +88,9 @@ /* Internal data structures and random procedures: */ -#define GC_HEAD ((struct sock *)(-1)) -#define GC_ORPHAN ((struct sock *)(-3)) - -static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */ +static LIST_HEAD(gc_inflight_list); +static LIST_HEAD(gc_candidates); +static DEFINE_SPINLOCK(unix_gc_lock); atomic_t unix_tot_inflight = ATOMIC_INIT(0); @@ -122,8 +125,16 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); if(s) { - atomic_inc(&unix_sk(s)->inflight); + struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + if (atomic_inc_return(&u->inflight) == 1) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); + } else { + BUG_ON(list_empty(&u->link)); + } atomic_inc(&unix_tot_inflight); + spin_unlock(&unix_gc_lock); } } @@ -131,182 +142,218 @@ void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); if(s) { - atomic_dec(&unix_sk(s)->inflight); + struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + BUG_ON(list_empty(&u->link)); + if (atomic_dec_and_test(&u->inflight)) + list_del_init(&u->link); atomic_dec(&unix_tot_inflight); + spin_unlock(&unix_gc_lock); } } +static inline struct sk_buff *sock_queue_head(struct sock *sk) +{ + return (struct sk_buff *) &sk->sk_receive_queue; +} -/* - * Garbage Collector Support Functions - */ +#define receive_queue_for_each_skb(sk, next, skb) \ + for (skb = sock_queue_head(sk)->next, next = skb->next; \ + skb != sock_queue_head(sk); skb = next, next = skb->next) -static inline struct sock *pop_stack(void) +static void scan_inflight(struct sock *x, void (*func)(struct sock *), + struct sk_buff_head *hitlist) { - struct sock *p = gc_current; - gc_current = unix_sk(p)->gc_tree; - return p; + struct sk_buff *skb; + struct sk_buff *next; + + spin_lock(&x->sk_receive_queue.lock); + receive_queue_for_each_skb(x, next, skb) { + /* + * Do we have file descriptors ? + */ + if (UNIXCB(skb).fp) { + bool hit = false; + /* + * Process the descriptors of this socket + */ + int nfd = UNIXCB(skb).fp->count; + struct file **fp = UNIXCB(skb).fp->fp; + while (nfd--) { + /* + * Get the socket the fd matches + * if it indeed does so + */ + struct sock *sk = unix_get_socket(*fp++); + if(sk) { + hit = true; + func(sk); + } + } + if (hit && hitlist != NULL) { + __skb_unlink(skb, &x->sk_receive_queue); + __skb_queue_tail(hitlist, skb); + } + } + } + spin_unlock(&x->sk_receive_queue.lock); } -static inline int empty_stack(void) +static void scan_children(struct sock *x, void (*func)(struct sock *), + struct sk_buff_head *hitlist) { - return gc_current == GC_HEAD; + if (x->sk_state != TCP_LISTEN) + scan_inflight(x, func, hitlist); + else { + struct sk_buff *skb; + struct sk_buff *next; + struct unix_sock *u; + LIST_HEAD(embryos); + + /* + * For a listening socket collect the queued embryos + * and perform a scan on them as well. + */ + spin_lock(&x->sk_receive_queue.lock); + receive_queue_for_each_skb(x, next, skb) { + u = unix_sk(skb->sk); + + /* + * An embryo cannot be in-flight, so it's safe + * to use the list link. + */ + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &embryos); + } + spin_unlock(&x->sk_receive_queue.lock); + + while (!list_empty(&embryos)) { + u = list_entry(embryos.next, struct unix_sock, link); + scan_inflight(&u->sk, func, hitlist); + list_del_init(&u->link); + } + } } -static void maybe_unmark_and_push(struct sock *x) +static void dec_inflight(struct sock *sk) { - struct unix_sock *u = unix_sk(x); + atomic_dec(&unix_sk(sk)->inflight); +} - if (u->gc_tree != GC_ORPHAN) - return; - sock_hold(x); - u->gc_tree = gc_current; - gc_current = x; +static void inc_inflight(struct sock *sk) +{ + atomic_inc(&unix_sk(sk)->inflight); } +static void inc_inflight_move_tail(struct sock *sk) +{ + struct unix_sock *u = unix_sk(sk); + + atomic_inc(&u->inflight); + /* + * If this is still a candidate, move it to the end of the + * list, so that it's checked even if it was already passed + * over + */ + if (u->gc_candidate) + list_move_tail(&u->link, &gc_candidates); +} /* The external entry point: unix_gc() */ void unix_gc(void) { - static DEFINE_MUTEX(unix_gc_sem); - int i; - struct sock *s; - struct sk_buff_head hitlist; - struct sk_buff *skb; + static bool gc_in_progress = false; - /* - * Avoid a recursive GC. - */ + struct unix_sock *u; + struct unix_sock *next; + struct sk_buff_head hitlist; + struct list_head cursor; - if (!mutex_trylock(&unix_gc_sem)) - return; + spin_lock(&unix_gc_lock); - spin_lock(&unix_table_lock); + /* Avoid a recursive GC. */ + if (gc_in_progress) + goto out; - forall_unix_sockets(i, s) - { - unix_sk(s)->gc_tree = GC_ORPHAN; - } + gc_in_progress = true; /* - * Everything is now marked - */ - - /* Invariant to be maintained: - - everything unmarked is either: - -- (a) on the stack, or - -- (b) has all of its children unmarked - - everything on the stack is always unmarked - - nothing is ever pushed onto the stack twice, because: - -- nothing previously unmarked is ever pushed on the stack + * First, select candidates for garbage collection. Only + * in-flight sockets are considered, and from those only ones + * which don't have any external reference. + * + * Holding unix_gc_lock will protect these candidates from + * being detached, and hence from gaining an external + * reference. This also means, that since there are no + * possible receivers, the receive queues of these sockets are + * static during the GC, even though the dequeue is done + * before the detach without atomicity guarantees. */ + list_for_each_entry_safe(u, next, &gc_inflight_list, link) { + int total_refs; + int inflight_refs; + + total_refs = file_count(u->sk.sk_socket->file); + inflight_refs = atomic_read(&u->inflight); + + BUG_ON(inflight_refs < 1); + BUG_ON(total_refs < inflight_refs); + if (total_refs == inflight_refs) { + list_move_tail(&u->link, &gc_candidates); + u->gc_candidate = 1; + } + } /* - * Push root set + * Now remove all internal in-flight reference to children of + * the candidates. */ - - forall_unix_sockets(i, s) - { - int open_count = 0; - - /* - * If all instances of the descriptor are not - * in flight we are in use. - * - * Special case: when socket s is embrion, it may be - * hashed but still not in queue of listening socket. - * In this case (see unix_create1()) we set artificial - * negative inflight counter to close race window. - * It is trick of course and dirty one. - */ - if (s->sk_socket && s->sk_socket->file) - open_count = file_count(s->sk_socket->file); - if (open_count > atomic_read(&unix_sk(s)->inflight)) - maybe_unmark_and_push(s); - } + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, dec_inflight, NULL); /* - * Mark phase + * Restore the references for children of all candidates, + * which have remaining references. Do this recursively, so + * only those remain, which form cyclic references. + * + * Use a "cursor" link, to make the list traversal safe, even + * though elements might be moved about. */ + list_add(&cursor, &gc_candidates); + while (cursor.next != &gc_candidates) { + u = list_entry(cursor.next, struct unix_sock, link); - while (!empty_stack()) - { - struct sock *x = pop_stack(); - struct sock *sk; - - spin_lock(&x->sk_receive_queue.lock); - skb = skb_peek(&x->sk_receive_queue); - - /* - * Loop through all but first born - */ + /* Move cursor to after the current position. */ + list_move(&cursor, &u->link); - while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) { - /* - * Do we have file descriptors ? - */ - if(UNIXCB(skb).fp) - { - /* - * Process the descriptors of this socket - */ - int nfd=UNIXCB(skb).fp->count; - struct file **fp = UNIXCB(skb).fp->fp; - while(nfd--) - { - /* - * Get the socket the fd matches if - * it indeed does so - */ - if((sk=unix_get_socket(*fp++))!=NULL) - { - maybe_unmark_and_push(sk); - } - } - } - /* We have to scan not-yet-accepted ones too */ - if (x->sk_state == TCP_LISTEN) - maybe_unmark_and_push(skb->sk); - skb=skb->next; + if (atomic_read(&u->inflight) > 0) { + list_move_tail(&u->link, &gc_inflight_list); + u->gc_candidate = 0; + scan_children(&u->sk, inc_inflight_move_tail, NULL); } - spin_unlock(&x->sk_receive_queue.lock); - sock_put(x); } + list_del(&cursor); + /* + * Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); - forall_unix_sockets(i, s) - { - struct unix_sock *u = unix_sk(s); + spin_unlock(&unix_gc_lock); - if (u->gc_tree == GC_ORPHAN) { - struct sk_buff *nextsk; + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); - spin_lock(&s->sk_receive_queue.lock); - skb = skb_peek(&s->sk_receive_queue); - while (skb && - skb != (struct sk_buff *)&s->sk_receive_queue) { - nextsk = skb->next; - /* - * Do we have file descriptors ? - */ - if (UNIXCB(skb).fp) { - __skb_unlink(skb, - &s->sk_receive_queue); - __skb_queue_tail(&hitlist, skb); - } - skb = nextsk; - } - spin_unlock(&s->sk_receive_queue.lock); - } - u->gc_tree = GC_ORPHAN; - } - spin_unlock(&unix_table_lock); + spin_lock(&unix_gc_lock); - /* - * Here we are. Hitlist is filled. Die. - */ + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); + gc_in_progress = false; - __skb_queue_purge(&hitlist); - mutex_unlock(&unix_gc_sem); + out: + spin_unlock(&unix_gc_lock); } diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 849cc06bd914..9ab31a3ce3ad 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -46,7 +46,6 @@ #include <linux/capability.h> #include <linux/errno.h> /* return codes */ #include <linux/kernel.h> -#include <linux/init.h> #include <linux/module.h> /* support for loadable modules */ #include <linux/slab.h> /* kmalloc(), kfree() */ #include <linux/mm.h> diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 205106521ecb..236e7eaf1b7f 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -164,14 +164,14 @@ static int status_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations config_op = { +static const struct seq_operations config_op = { .start = r_start, .next = r_next, .stop = r_stop, .show = config_show, }; -static struct seq_operations status_op = { +static const struct seq_operations status_op = { .start = r_start, .next = r_next, .stop = r_stop, diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 3a96ae60271c..092116e390b6 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o -cfg80211-y += core.o sysfs.o +cfg80211-y += core.o sysfs.o radiotap.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 7eabd55417a5..9771451eae21 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -213,7 +213,7 @@ out_fail_notifier: out_fail_sysfs: return err; } -module_init(cfg80211_init); +subsys_initcall(cfg80211_init); static void cfg80211_exit(void) { diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 88aaacd9f822..2d5d2255a27c 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -52,12 +52,14 @@ static void wiphy_dev_release(struct device *dev) cfg80211_dev_free(rdev); } +#ifdef CONFIG_HOTPLUG static int wiphy_uevent(struct device *dev, char **envp, int num_envp, char *buf, int size) { /* TODO, we probably need stuff here */ return 0; } +#endif struct class ieee80211_class = { .name = "ieee80211", diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 96001f0c64fc..7405b9c5b7f2 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -234,21 +234,21 @@ out: return 0; } -static struct seq_operations x25_seq_route_ops = { +static const struct seq_operations x25_seq_route_ops = { .start = x25_seq_route_start, .next = x25_seq_route_next, .stop = x25_seq_route_stop, .show = x25_seq_route_show, }; -static struct seq_operations x25_seq_socket_ops = { +static const struct seq_operations x25_seq_socket_ops = { .start = x25_seq_socket_start, .next = x25_seq_socket_next, .stop = x25_seq_socket_stop, .show = x25_seq_socket_show, }; -static struct seq_operations x25_seq_forward_ops = { +static const struct seq_operations x25_seq_forward_ops = { .start = x25_seq_forward_start, .next = x25_seq_forward_next, .stop = x25_seq_forward_stop, diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 5c4695840c58..113f44429982 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -83,5 +83,5 @@ void __init xfrm_input_init(void) secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b48f06fc9fd9..7012891d39f2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -23,10 +23,9 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/audit.h> #include <net/xfrm.h> #include <net/ip.h> -#include <linux/audit.h> -#include <linux/cache.h> #include "xfrm_hash.h" @@ -857,7 +856,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) pol, NULL); return err; } - } + } for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, xfrm_policy_bydst[dir].table + i, @@ -1299,7 +1298,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL) { + if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; family = tmpl->encap_family; @@ -2194,9 +2194,10 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, } if (sid != 0 && - security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) + security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); - else + security_release_secctx(secctx, secctx_len); + } else audit_log_task_context(audit_buf); if (xp) { @@ -2378,7 +2379,7 @@ static void __init xfrm_policy_init(void) xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index dfacb9c2a6e3..d4356e6f7f9b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -19,9 +19,8 @@ #include <linux/ipsec.h> #include <linux/module.h> #include <linux/cache.h> -#include <asm/uaccess.h> #include <linux/audit.h> -#include <linux/cache.h> +#include <asm/uaccess.h> #include "xfrm_hash.h" @@ -407,7 +406,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) xfrm_audit_log(audit_info->loginuid, audit_info->secid, AUDIT_MAC_IPSEC_DELSA, - 0, NULL, x); + 0, NULL, x); return err; } @@ -611,7 +610,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, selector. */ if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, family) || + if (!xfrm_selector_match(&x->sel, fl, x->sel.family) || !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || @@ -623,7 +622,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, acquire_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, family) && + if (xfrm_selector_match(&x->sel, fl, x->sel.family) && security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } @@ -686,6 +685,37 @@ out: return x; } +struct xfrm_state * +xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family, u8 mode, u8 proto, u32 reqid) +{ + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); + struct xfrm_state *rx = NULL, *x = NULL; + struct hlist_node *entry; + + spin_lock(&xfrm_state_lock); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, family) && + mode == x->props.mode && + proto == x->id.proto && + x->km.state == XFRM_STATE_VALID) { + rx = x; + break; + } + } + + if (rx) + xfrm_state_hold(rx); + spin_unlock(&xfrm_state_lock); + + + return rx; +} +EXPORT_SYMBOL(xfrm_stateonly_find); + static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c06883bf620e..61339e17a0f5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -322,6 +322,13 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * x->props.family = p->family; memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; + + /* + * Set inner address family if the KM left it as zero. + * See comment in validate_tmpl. + */ + if (!x->sel.family) + x->sel.family = p->family; } /* |